Coverage for sparc/docparser.py: 83%
380 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-18 16:19 +0000
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-18 16:19 +0000
1# -*- coding: utf-8 -*-
2"""
3A module to parse the latex documents provided by SPARC
4and convert to its Python API
6Created on Wed Mar 1 15:32:31 EST 2023
8Tian Tian (alchem0x2a@gmail.com)
9"""
10import json
11import re
12from copy import copy
13from datetime import datetime
14from pathlib import Path
15from warnings import warn
17import numpy as np
19# Some fields in master SPARC doc may cause auto type detection
20# to fail, need hard-coded post-processing for now
21postprocess_items = {
22 "RELAX_FLAG": {"allow_bool_input": False},
23 "NPT_SCALE_CONSTRAINTS": {"type": "string"},
24 "NPT_SCALE_VECS": {"type": "integer array"},
25 "TOL_POISSON": {"type": "double"},
26}
28sparc_repo_url = "https://github.com/SPARC-X/SPARC.git"
31class SparcDocParser(object):
32 """Parses LaTeX documentation of SPARC-X and converts it into a Python API.
34 This class extracts parameter information from LaTeX source files,
35 organizing it into a structured format that can be easily used in
36 Python. It supports parsing of version details, parameter types,
37 units, and other relevant information.
39 Attributes:
40 version (str): Parsed SPARC version, based on the documentation.
41 parameter_categories (list): Categories of parameters extracted.
42 parameters (dict): Extracted parameters with detailed information.
43 other_parameters (dict): Additional parameters not categorized.
44 suppress_warnings (bool): Whether the doc parser suppress UserWarning (may be annoying during class import)
46 Methods:
47 find_main_file(main_file_pattern): Finds the main LaTeX file based on a pattern.
48 get_include_files(): Retrieves a list of included LaTeX files.
49 parse_version(parse): Parses and sets the SPARC version.
50 parse_parameters(): Extracts parameters from LaTeX files.
51 postprocess(): Applies hard-coded post-processing to some parameters.
52 to_dict(): Converts parsed information into a dictionary.
53 json_from_directory(directory, include_subdirs, **kwargs): Class method to create JSON from a directory.
54 json_from_repo(url, version, include_subdirs, **kwargs): Class method to create JSON from a repository.
56 """
58 def __init__(
59 self,
60 directory=".",
61 main_file="*Manual.tex",
62 intro_file="Introduction.tex",
63 params_from_intro=True,
64 parse_version=True,
65 suppress_warnings=True,
66 ):
67 """Create the doc parser pointing to the root of the doc file of SPARC
69 The SPARC doc is organized as follows:
70 SPARC/doc/.LaTeX/
71 |---- Manual.tex
72 |---- Introduction.tex
73 |---- {Section}.tex
75 For parameters additional to the standard SPARC options, such as the SQ / cyclix
76 options, we merge the dict from the sub-dirs
78 Args:
79 doc_root: root directory to the LaTeX files, may look like `SPARC/doc/.LaTeX`
80 main_file: main LaTeX file for the manual
81 intro_file: LaTeX file for the introduction
82 params_from_intro: only contain the parameters that can be parsed in `intro_file`
83 parse_date: get the SPARC version by date
84 suppress_warnings: whether to silence any warnings generated by the parser
85 """
86 self.suppress_warnings = suppress_warnings
87 self.params_from_intro = params_from_intro
88 self.root = Path(directory)
89 self.intro_file = self.root / intro_file
90 self.main_file = self.find_main_file(main_file)
91 if not self.intro_file.is_file():
92 raise FileNotFoundError(f"Introduction file {intro_file} is missing!")
93 self.include_files = self.get_include_files()
94 self.parse_version(parse_version)
95 self.parse_parameters()
96 self.postprocess()
98 def find_main_file(self, main_file_pattern):
99 """
100 Finds the main LaTeX file that matches the given pattern, e.g. Manual.tex or Manual_cyclix.te
102 Args:
103 main_file_pattern (str): Pattern to match the main LaTeX file name.
105 Returns:
106 Path: Path to the main LaTeX file.
108 Raises:
109 FileNotFoundError: If no or multiple files match the pattern.
110 """
111 candidates = list(self.root.glob(main_file_pattern))
112 if len(candidates) != 1:
113 raise FileNotFoundError(
114 f"Main file {main_file_pattern} is missing or more than 1 exists!"
115 )
116 return candidates[0]
118 def get_include_files(self):
119 """
120 Retrieves a list of LaTeX files included in the main LaTeX document, e.g. Manual.tex.
122 Returns:
123 list: A list of paths to the included LaTeX files.
124 """
125 pattern = r"\\begin\{document\}(.*?)\\end\{document\}"
126 text = open(self.main_file, "r", encoding="utf8").read()
127 # Only the first begin/end document will be matched
128 match = re.findall(pattern, text, re.DOTALL)[0]
129 pattern_include = r"\\include\{(.+?)\}"
130 include = re.findall(pattern_include, match, re.DOTALL)
131 include_files = []
132 for name in include:
133 tex_file = self.root / f"{name}.tex"
134 if tex_file.is_file():
135 include_files.append(tex_file)
136 else:
137 if not self.suppress_warnings:
138 warn(
139 (
140 f"TeX file {tex_file} is missing! It may be a typo in the document, "
141 "ignore parameters from this file."
142 )
143 )
144 return include_files
146 def parse_version(self, parse=True):
147 """
148 Parses and sets the SPARC version based on the C-source file, if possible.
149 The date for the SPARC code is parsed from initialization.c in the "YYYY.MM.DD"
150 format.
152 Args:
153 parse (bool): Whether to parse the version from the documentation.
155 Sets:
156 self.version (str): The parsed version in 'YYYY.MM.DD' format or None,
157 if either parse=False, or the C-source code is missing
158 """
159 if parse is False:
160 self.version = None
161 return
162 init_c = self.root.parents[1] / "src" / "initialization.c"
163 if not init_c.is_file():
164 if not self.suppress_warnings:
165 warn(
166 'Cannot find the c source file "initialization.c", skip version parsing!'
167 )
168 self.version = None
169 return
170 text = open(init_c, "r", encoding="utf8").read()
171 pattern_version = r"SPARC\s+\(\s*?version(.*?)\)"
172 match = re.findall(pattern_version, text)
173 if len(match) != 1:
174 if not self.suppress_warnings:
175 warn(
176 'Parsing c source file "initialization.c" for version is unsuccessful!'
177 )
178 self.version = None
179 return
180 # We need to add more spacing matching in case the source code includes extra
181 date_str = re.sub(r"\s+", " ", match[0].strip().replace(",", " "))
182 # Older version of SPARC doc may contain abbreviated month format
183 date_version = None
184 for fmt in ("%b %d %Y", "%B %d %Y"):
185 try:
186 date_version = datetime.strptime(date_str, fmt).strftime("%Y.%m.%d")
187 break
188 except Exception:
189 continue
190 if date_version is None:
191 raise ValueError(f"Cannot parse date time {date_str}")
192 self.version = date_version
193 return
195 def __parse_parameter_from_frame(self, frame):
196 """Parse the parameters from a single LaTeX frame
198 Args:
199 frame (str): a string containing the LaTeX frame (e.g. \\begin{frame} ... \\end{frame})
201 Returns:
202 dict: a key-value paired dict parsed from the frame. Some field names include:
203 name: TOL_POISSON
204 type: Double | Integer | String | Character | Double array
205 unit: specified in the doc
206 """
207 pattern_label = r"\\texttt\{(.*?)\}.*?\\label\{(.*?)\}"
208 pattern_block = r"\\begin\{block\}\{(.*?)\}([\s\S]*?)\\end\{block\}"
209 match_label = re.findall(pattern_label, frame, re.DOTALL | re.MULTILINE)
210 if len(match_label) != 1:
211 if not self.suppress_warnings:
212 warn("Provided a non-structured frame for parsing, skip.")
213 return {}
214 symbol, label = (
215 convert_tex_parameter(match_label[0][0].strip()),
216 match_label[0][1].strip(),
217 )
218 # Every match contains the (name, content) pair of the blocks
219 matches = re.findall(pattern_block, frame, re.DOTALL | re.MULTILINE)
220 param_dict = {"symbol": symbol, "label": label}
221 # TODO: add more type definition
222 for key, content in matches:
223 key = key.lower()
224 content = content.strip()
225 # Do not parse commented-out values
227 if (key == "type") and (content.startswith("%")):
228 if not self.suppress_warnings:
229 warn(f"Parameter {symbol} is disabled in the doc, ignore!")
230 return {}
231 if key in ("example",):
232 content = convert_tex_example(content)
233 param_dict[key] = content
234 # Sanitize 1: Convert types
235 param_dict = sanitize_type(param_dict, suppress_warnings=self.suppress_warnings)
236 # Sanitize 2: Convert default values
237 param_dict = sanitize_default(
238 param_dict, suppress_warnings=self.suppress_warnings
239 )
240 # Sanitize 3: Remove TeX components in description and remark
241 param_dict = sanitize_description(param_dict)
243 return param_dict
245 def __parse_frames_from_text(self, text):
246 """Extract all the frames that aren't commented in the text
248 Arguments:
249 text (str): Full LaTeX text
250 Returns:
251 list: Matched LaTeX Beamer frame fragments
252 """
253 pattern_frame = r"\\begin\{frame\}(.*?)\\end\{frame\}"
254 matches = re.findall(pattern_frame, text, re.DOTALL | re.MULTILINE)
255 return matches
257 def __parse_intro_file(self):
258 """Parse the introduction file
260 Returns:
261 parameter_dict (dict): dictionary using the parameter category as the main key
262 (following order in Introduction.tex)
263 parameter_categories (list): list of categories
264 """
265 text_intro = open(self.intro_file, "r", encoding="utf8").read()
266 pattern_params = (
267 r"^\\begin\{frame\}.*?\{Input file options\}.*?$(.*?)\\end\{frame\}"
268 )
269 pattern_block = r"\\begin\{block\}\{(.*?)\}([\s\S]*?)\\end\{block\}"
270 pattern_line = r"\\hyperlink\{(.*?)\}{\\texttt\{(.*?)\}\}"
271 text_params = re.findall(pattern_params, text_intro, re.DOTALL | re.MULTILINE)[
272 0
273 ]
274 parameter_categories = []
275 parameter_dict = {}
276 for match in re.findall(pattern_block, text_params):
277 cat = match[0].lower()
278 if cat in parameter_categories:
279 raise ValueError(
280 f"Key {cat} already exists! You might have a wrong LaTeX doc file!"
281 )
282 parameter_categories.append(cat)
283 parameter_dict[cat] = []
284 param_lines = match[1].split("\n")
285 for line in param_lines:
286 matches = re.findall(pattern_line, line)
287 if len(matches) == 0:
288 continue
289 # Each match should contain 2 items, the "Link" that matches a reference in included-tex files
290 # symbol is the actual symbol name (in text-format)
291 # In most cases the link and symbol should be the same
292 for match in matches:
293 label, symbol = match[0].strip(), convert_tex_parameter(
294 match[1].strip()
295 )
296 parameter_dict[cat].append({"label": label, "symbol": symbol})
297 return parameter_categories, parameter_dict
299 def __parse_all_included_files(self):
300 """Pop up all known parameters from included files
301 Returns:
302 dict: All known parameters from included files
303 """
304 all_params = {}
305 for f in self.include_files:
306 # Do not parse intro file since it's waste of time
307 if f.resolve() == self.intro_file.resolve():
308 continue
309 text = open(f, "r", encoding="utf8").read()
310 frames = self.__parse_frames_from_text(text)
311 for frame in frames:
312 dic = self.__parse_parameter_from_frame(frame)
313 if len(dic) > 0:
314 label = dic["label"]
315 all_params[label] = dic
316 return all_params
318 def parse_parameters(self):
319 """The actual thing for parsing parameters
321 Sets:
322 parameters (dict): All parsed parameters
323 parameter_categoris (list): List of categories
324 other_parameters (dict): Any parameters that are not included in the categories
325 """
326 parameter_categories, parameter_dict = self.__parse_intro_file()
327 all_params = self.__parse_all_included_files()
328 self.parameter_categories = parameter_categories
329 # parameters contain only the "valid" ones that are shown in the intro
330 # all others are clustered in "other_parameters"
331 self.parameters = {}
332 for cat, params in parameter_dict.items():
333 for p in params:
334 label = p["label"]
335 symbol = p["symbol"]
336 param_details = all_params.pop(label, {})
337 if param_details != {}:
338 param_details["category"] = cat
339 self.parameters[symbol] = param_details
341 self.other_parameters = {}
342 for param_details in all_params.values():
343 symbol = param_details["symbol"]
344 self.other_parameters[symbol] = param_details
345 return
347 def postprocess(self):
348 """Use the hardcoded dict prostprocess_items to fix some issues"""
349 for param, fix in postprocess_items.items():
350 if param in self.parameters:
351 self.parameters[param].update(**fix)
352 return
354 def to_dict(self):
355 """Output a json dict from current document parser
357 Returns:
358 dict: All API schemes in dict
359 """
360 doc = {}
361 doc["sparc_version"] = self.version
362 doc["categories"] = self.parameter_categories
363 doc["parameters"] = {k: v for k, v in sorted(self.parameters.items())}
364 doc["other_parameters"] = {
365 k: v for k, v in sorted(self.other_parameters.items())
366 }
367 doc["data_types"] = sorted(set([p["type"] for p in self.parameters.values()]))
368 return doc
370 @classmethod
371 def json_from_directory(cls, directory=".", include_subdirs=True, **kwargs):
372 """
373 Recursively add parameters from all Manual files
374 Arguments:
375 directory (str or PosixPath): The directory to the LaTeX files, e.g. <sparc-root>/doc/.LaTeX
376 include_subdirs (bool): If true, also parse the manual files in submodules, e.g. cyclix, highT
377 Returns:
378 str: Formatted json-string of the API
379 """
380 directory = Path(directory)
381 root_dict = cls(directory=directory, **kwargs).to_dict()
382 if include_subdirs:
383 for sub_manual_tex in directory.glob("*/*Manual.tex"):
384 subdir = sub_manual_tex.parent
385 try:
386 sub_dict = cls(directory=subdir, parse_version=False).to_dict()
387 except FileNotFoundError:
388 print(
389 subdir,
390 " Latex files not found. Check naming conventions for Manual.tex. Expects format *Manual.tex",
391 )
392 continue
393 for param, param_desc in sub_dict["parameters"].items():
394 if param not in root_dict["parameters"]:
395 root_dict["parameters"][param] = param_desc
396 # Combine the subdir categories
397 for sub_category in sub_dict["categories"]:
398 if sub_category not in root_dict["categories"]:
399 root_dict["categories"].append(sub_category)
400 # Combine data types
401 for sub_dt in sub_dict["data_types"]:
402 if sub_dt not in root_dict["data_types"]:
403 root_dict["data_types"].append(sub_dt)
405 json_string = json.dumps(root_dict, indent=True)
406 return json_string
408 @classmethod
409 def json_from_repo(
410 cls, url=sparc_repo_url, version="master", include_subdirs=True, **kwargs
411 ):
412 """
413 Download the source code from git and use json_from_directory to parse
414 Arguments:
415 url (str): URL for the repository of SPARC, default is "https://github.com/SPARC-X/SPARC.git"
416 version (str): Git version or commit hash of the SPARC repo
417 include_subdirs (bool): If true, also parse the manual files in submodules, e.g. cyclix, highT
418 Returns:
419 str: Formatted json-string of the API
420 """
421 import tempfile
422 from subprocess import run
424 with tempfile.TemporaryDirectory() as tmpdir:
425 tmpdir = Path(tmpdir)
426 download_dir = tmpdir / "SPARC"
427 download_cmds = ["git", "clone", "--depth", "1", str(url), "SPARC"]
428 run(download_cmds, cwd=tmpdir)
429 if version not in ["master", "HEAD"]:
430 fetch_cmds = ["git", "fetch", "--depth", "1", str(version)]
431 run(fetch_cmds, cwd=download_dir)
432 checkout_cmds = ["git", "checkout", str(version)]
433 run(checkout_cmds, cwd=download_dir)
434 json_string = cls.json_from_directory(
435 directory=download_dir / "doc" / ".LaTeX",
436 include_subdirs=include_subdirs,
437 **kwargs,
438 )
439 return json_string
442def convert_tex_parameter(text):
443 """
444 Conver a TeX string to non-escaped name (for parameter only)
445 Arguments:
446 text (str): Parameter name in LaTeX format
447 Returns:
448 str: Text with sanitized parameter
449 """
450 return text.strip().replace("\_", "_")
453def convert_tex_example(text):
454 """Convert TeX codes of examples as much as possible
455 The examples follow the format
456 SYMBOL: values (may contain new lines)
457 Arguments:
458 text (str): Single or multiline LaTeX contents
459 Returns:
460 str: Sanitized literal text
461 """
462 mapper = {"\\texttt{": "", "\_": "_", "}": "", "\\": "\n"}
463 new_text = copy(text)
464 for m, r in mapper.items():
465 new_text = new_text.replace(m, r)
466 symbol, values = new_text.split(":", maxsplit=1)
467 symbol = symbol.strip()
468 values = re.sub("\n+", "\n", values.strip())
469 # Remove all comment lines
470 values = "\n".join(
471 [l for l in values.splitlines() if not l.lstrip().startswith("%")]
472 )
473 new_text = f"{symbol}: {values}"
474 return new_text
477def convert_tex_default(text, desired_type=None, suppress_warnings=False):
478 """Convert default values as much as possible.
479 The desire type will convert the default values
480 to the closest format
482 Currently supported conversions
483 1. Remove all surrounding text modifiers (texttt)
484 2. Remove all symbol wrappers $
485 3. Convert value to single or array
487 Arguments:
488 text (str): Raw text string for value
489 desired_type (str or None): Data type to be converted to. If None, preserve the string format
491 Returns:
492 converted: Value converted from raw text
493 """
494 mapper = {
495 "\\texttt{": "",
496 "}": "",
497 "{": "",
498 "\\_": "_",
499 "\_": "_",
500 "\\\\": "\n",
501 "$": "",
502 }
503 text = text.strip()
504 text = re.sub(r"\\hyperlink\{.*?\}", "", text)
505 text = re.sub(r"\\times", "x", text)
506 for m, r in mapper.items():
507 text = text.replace(m, r)
508 text = re.sub(r"\n+", "\n", text)
509 # Remove all comment lines
510 text = "\n".join([l for l in text.splitlines() if not l.lstrip().startswith("%")])
512 # print(text)
513 converted = None
514 if "none" in text.lower():
515 converted = None
516 elif "no default" in text.lower():
517 converted = None
518 elif "automat" in text.lower():
519 converted = "auto"
520 else:
521 # try type conversion
522 if desired_type is None:
523 converted = text
524 elif desired_type == "string":
525 converted = text
526 else:
527 converted = text2value(
528 text, desired_type, suppress_warnings=suppress_warnings
529 )
530 return converted
533def convert_comment(text):
534 """Used to remove TeX-specific commands in description and remarks
535 as much as possible
537 Arguments:
538 text (str): Raw LaTeX code for the comment section in manual
540 Returns:
541 str: Sanitized plain text
542 """
543 mapper = {
544 "\\texttt{": "",
545 "}": "",
546 "{": "",
547 "\\_": "_",
548 "\_": "_",
549 "\\\\": "\n",
550 "$": "",
551 }
552 text = text.strip()
553 text = re.sub(r"\\hyperlink\{.*?\}", "", text)
554 text = re.sub(r"\\href\{.*?\}", "", text)
555 text = re.sub(r"\\times", "x", text)
556 for m, r in mapper.items():
557 text = text.replace(m, r)
558 text = re.sub(r"\n+", "\n", text)
559 # Remove all comment lines
560 text = "\n".join([l for l in text.splitlines() if not l.lstrip().startswith("%")])
561 return text
564def text2value(text, desired_type, suppress_warnings=False):
565 """Convert raw text to a desired type
567 Arguments:
568 text (str): Text contents for the value
569 desired_type (str): Target data type from 'string', 'integer',
570 'integer array', 'double', 'double array',
571 'bool', 'bool array'
572 suppress_warnings (bool): Suppress UserWarning if overwhelming for end-users
573 Returns:
574 converted: Value converted to the desired type
575 """
576 if desired_type is None:
577 return text
578 desired_type = desired_type.lower()
579 if desired_type == "string":
580 return text.strip()
582 try:
583 arr = np.genfromtxt(text.splitlines(), delimiter=" ", dtype=float)
584 if np.isnan(arr).any():
585 if not suppress_warnings:
586 warn(
587 f"Some fields in {text} cannot converted to a numerical array, will skip conversion."
588 )
589 arr = None
590 except Exception as e:
591 if not suppress_warnings:
592 warn(
593 f"Cannot transform {text} to array, skip converting. Error message is:\n {e}"
594 )
595 arr = None
597 if arr is None:
598 return None
600 # Upshape ndarray to at least 1D
601 if arr.shape == ():
602 arr = np.reshape(arr, [1])
604 converted = None
605 from contextlib import suppress
607 # Ignore all failures and make conversion None
608 with suppress(Exception):
609 if desired_type == "integer":
610 converted = int(arr[0])
611 elif desired_type == "bool":
612 converted = bool(arr[0])
613 elif desired_type == "double":
614 converted = float(arr[0])
615 elif desired_type == "integer array":
616 converted = np.ndarray.tolist(arr.astype(int))
617 elif desired_type == "bool array":
618 converted = np.ndarray.tolist(arr.astype(bool))
619 elif desired_type == "double array":
620 converted = np.ndarray.tolist(arr.astype(float))
621 return converted
624def is_array(text):
625 """Simply try to convert a string into a numpy array and compare if length is larger than 1
626 it is only used to compare a float / int value
627 """
628 val = np.fromstring(text, sep=" ")
629 if len(val) == 1:
630 return False
631 else:
632 return True
635def contain_only_bool(text):
636 """Check if a string only contains 0 1 or spaces"""
637 if any([c in text for c in (".", "+", "-", "e", "E")]):
638 return False
639 digits = re.findall(r"[-+e\d]+", text, re.DOTALL)
640 for d in digits:
641 val = int(d)
642 if val not in (0, 1):
643 return False
644 return True
647def sanitize_description(param_dict):
648 """Sanitize the description and remark field
650 Arguments:
651 param_dict (dict): Raw dict for one parameter entry
653 Returns:
654 dict: Sanitized parameter dict with comment, remark and description
655 converted to human-readable formats
656 """
657 sanitized_dict = param_dict.copy()
659 original_desc = sanitized_dict["description"]
660 sanitized_dict["description_raw"] = original_desc
662 original_remark = sanitized_dict.get("remark", "")
663 sanitized_dict["remark_raw"] = original_remark
665 sanitized_dict["description"] = convert_comment(original_desc)
666 sanitized_dict["remark"] = convert_comment(original_remark)
667 return sanitized_dict
670def sanitize_default(param_dict, suppress_warnings=False):
671 """Sanitize the default field
672 1. Create an extra field `default_remark` that copies original default
673 2. Use `convert_tex_default` to convert values as much as possible
675 This function should be called after sanitize_type
676 """
677 sanitized_dict = param_dict.copy()
678 original_default = sanitized_dict["default"]
679 sanitized_dict["default_remark"] = original_default
680 converted_default = convert_tex_default(
681 original_default, param_dict["type"], suppress_warnings=suppress_warnings
682 )
683 sanitized_dict["default"] = converted_default
684 return sanitized_dict
687def sanitize_type(param_dict, suppress_warnings=False):
688 """Sanitize the param dict so that the type are more consistent
690 For example, if type is Double / Integer,
691 but parameter is a vector,
692 make a double vector or integer vector
693 """
694 sanitized_dict = param_dict.copy()
695 symbol = param_dict["symbol"]
696 origin_type = param_dict.get("type", None)
697 if origin_type is None:
698 print("Dict does not have type!")
699 return sanitized_dict
700 origin_type = origin_type.lower()
702 sanitized_type = None
703 sanitized_dict["allow_bool_input"] = False
704 # First pass, remove all singular types
705 if origin_type == "0 or 1":
706 origin_type = "integer"
707 elif "permutation" in origin_type:
708 sanitized_type = "integer"
709 elif origin_type in ("string", "character"):
710 sanitized_type = "string"
711 elif "array" in origin_type:
712 sanitized_type = origin_type
714 # Pass 2, test if int values are arrays
715 if (origin_type in ["int", "integer", "double"]) and (sanitized_type is None):
716 if "int" in origin_type:
717 origin_type = "integer"
718 # Test if the value from example is a single value or array
719 try:
720 example_value = param_dict["example"].split(":")[1]
721 default = param_dict["default"]
722 _array_test = is_array(example_value)
723 _bool_test = contain_only_bool(example_value) and contain_only_bool(default)
724 except Exception as e:
725 if not suppress_warnings:
726 warn(
727 f"Array conversion failed for {example_value}, ignore."
728 f"The error is {e}"
729 )
730 _array_test = False # Retain
731 _bool_test = False
733 if _array_test is True:
734 sanitized_type = f"{origin_type} array"
735 else:
736 sanitized_type = origin_type
738 # Pass 3: int to boolean test. This should be done very tight
739 if _bool_test and ("integer" in sanitized_type):
740 sanitized_dict["allow_bool_input"] = True
742 if sanitized_type is None:
743 # Currently there is only one NPT_NH_QMASS has this type
744 # TODO: think of a way to format a mixed array?
745 if not suppress_warnings:
746 warn(f"Type of {symbol} if not standard digit or array, mark as others.")
747 sanitized_type = "other"
748 # TODO: how about provide a true / false type?
749 sanitized_dict["type"] = sanitized_type
750 return sanitized_dict
753if __name__ == "__main__":
754 # Run the module as independent script to extract a json-formatted parameter list
755 from argparse import ArgumentParser
757 argp = ArgumentParser(description="Parse the LaTeX doc to json")
758 argp.add_argument(
759 "-o",
760 "--output",
761 default="parameters.json",
762 help="Output file name (json-formatted)",
763 )
764 argp.add_argument(
765 "--include-subdirs",
766 action="store_true",
767 help="Parse manual parameters from subdirs",
768 )
769 argp.add_argument("--git", action="store_true")
770 argp.add_argument(
771 "--version",
772 default="master",
773 help="Version of the doc. Only works when using git repo",
774 )
775 argp.add_argument(
776 "root",
777 nargs="?",
778 help=(
779 "Root of the SPARC doc LaTeX files, or remote git repo link. If not provided and --git is enables, use the default github repo"
780 ),
781 )
783 args = argp.parse_args()
784 output = Path(args.output).with_suffix(".json")
785 if args.git:
786 if args.root is None:
787 root = sparc_repo_url
788 else:
789 root = args.root
790 json_string = SparcDocParser.json_from_repo(
791 url=root, version=args.version, include_subdirs=args.include_subdirs
792 )
793 else:
794 json_string = SparcDocParser.json_from_directory(
795 directory=Path(args.root), include_subdirs=args.include_subdirs
796 )
797 with open(output, "w", encoding="utf8") as fd:
798 fd.write(json_string)
799 print(f"SPARC parameter specifications written to {output}!")
800 print("If you need to finetune the definitions, please edit them manually.")