Coverage for sparc/docparser.py: 83%
369 statements
« prev ^ index » next coverage.py v7.6.9, created at 2024-12-12 01:13 +0000
« prev ^ index » next coverage.py v7.6.9, created at 2024-12-12 01:13 +0000
1# -*- coding: utf-8 -*-
2"""
3A module to parse the latex documents provided by SPARC
4and convert to its Python API
6Created on Wed Mar 1 15:32:31 EST 2023
8Tian Tian (alchem0x2a@gmail.com)
9"""
10import json
11import re
12from copy import copy
13from datetime import datetime
14from pathlib import Path
15from warnings import warn
17import numpy as np
19# Some fields in master SPARC doc may cause auto type detection
20# to fail, need hard-coded post-processing for now
21postprocess_items = {
22 "RELAX_FLAG": {"allow_bool_input": False},
23 "NPT_SCALE_CONSTRAINTS": {"type": "string"},
24 "NPT_SCALE_VECS": {"type": "integer array"},
25 "TOL_POISSON": {"type": "double"},
26}
28sparc_repo_url = "https://github.com/SPARC-X/SPARC.git"
31class SparcDocParser(object):
32 """Parses LaTeX documentation of SPARC-X and converts it into a Python API.
34 This class extracts parameter information from LaTeX source files,
35 organizing it into a structured format that can be easily used in
36 Python. It supports parsing of version details, parameter types,
37 units, and other relevant information.
39 Attributes:
40 version (str): Parsed SPARC version, based on the documentation.
41 parameter_categories (list): Categories of parameters extracted.
42 parameters (dict): Extracted parameters with detailed information.
43 other_parameters (dict): Additional parameters not categorized.
45 Methods:
46 find_main_file(main_file_pattern): Finds the main LaTeX file based on a pattern.
47 get_include_files(): Retrieves a list of included LaTeX files.
48 parse_version(parse): Parses and sets the SPARC version.
49 parse_parameters(): Extracts parameters from LaTeX files.
50 postprocess(): Applies hard-coded post-processing to some parameters.
51 to_dict(): Converts parsed information into a dictionary.
52 json_from_directory(directory, include_subdirs, **kwargs): Class method to create JSON from a directory.
53 json_from_repo(url, version, include_subdirs, **kwargs): Class method to create JSON from a repository.
55 """
57 def __init__(
58 self,
59 directory=".",
60 main_file="*Manual.tex",
61 intro_file="Introduction.tex",
62 params_from_intro=True,
63 parse_version=True,
64 ):
65 """Create the doc parser pointing to the root of the doc file of SPARC
67 The SPARC doc is organized as follows:
68 SPARC/doc/.LaTeX/
69 |---- Manual.tex
70 |---- Introduction.tex
71 |---- {Section}.tex
73 For parameters additional to the standard SPARC options, such as the SQ / cyclix
74 options, we merge the dict from the sub-dirs
76 Args:
77 doc_root: root directory to the LaTeX files, may look like `SPARC/doc/.LaTeX`
78 main_file: main LaTeX file for the manual
79 intro_file: LaTeX file for the introduction
80 params_from_intro: only contain the parameters that can be parsed in `intro_file`
81 parse_date: get the SPARC version by date
82 """
83 self.root = Path(directory)
84 self.main_file = self.find_main_file(main_file)
85 self.intro_file = self.root / intro_file
86 if not self.intro_file.is_file():
87 raise FileNotFoundError(f"Introduction file {intro_file} is missing!")
88 self.include_files = self.get_include_files()
89 self.params_from_intro = params_from_intro
90 self.parse_version(parse_version)
91 self.parse_parameters()
92 self.postprocess()
94 def find_main_file(self, main_file_pattern):
95 """
96 Finds the main LaTeX file that matches the given pattern, e.g. Manual.tex or Manual_cyclix.te
98 Args:
99 main_file_pattern (str): Pattern to match the main LaTeX file name.
101 Returns:
102 Path: Path to the main LaTeX file.
104 Raises:
105 FileNotFoundError: If no or multiple files match the pattern.
106 """
107 candidates = list(self.root.glob(main_file_pattern))
108 if len(candidates) != 1:
109 raise FileNotFoundError(
110 f"Main file {main_file_pattern} is missing or more than 1 exists!"
111 )
112 return candidates[0]
114 def get_include_files(self):
115 """
116 Retrieves a list of LaTeX files included in the main LaTeX document, e.g. Manual.tex.
118 Returns:
119 list: A list of paths to the included LaTeX files.
120 """
121 pattern = r"\\begin\{document\}(.*?)\\end\{document\}"
122 text = open(self.main_file, "r", encoding="utf8").read()
123 # Only the first begin/end document will be matched
124 match = re.findall(pattern, text, re.DOTALL)[0]
125 pattern_include = r"\\include\{(.+?)\}"
126 include = re.findall(pattern_include, match, re.DOTALL)
127 include_files = []
128 for name in include:
129 tex_file = self.root / f"{name}.tex"
130 if tex_file.is_file():
131 include_files.append(tex_file)
132 else:
133 warn(
134 (
135 f"TeX file {tex_file} is missing! It may be a typo in the document, "
136 "ignore parameters from this file."
137 )
138 )
139 return include_files
141 def parse_version(self, parse=True):
142 """
143 Parses and sets the SPARC version based on the C-source file, if possible.
144 The date for the SPARC code is parsed from initialization.c in the "YYYY.MM.DD"
145 format.
147 Args:
148 parse (bool): Whether to parse the version from the documentation.
150 Sets:
151 self.version (str): The parsed version in 'YYYY.MM.DD' format or None,
152 if either parse=False, or the C-source code is missing
153 """
154 if parse is False:
155 self.version = None
156 return
157 init_c = self.root.parents[1] / "src" / "initialization.c"
158 if not init_c.is_file():
159 warn(
160 'Cannot find the c source file "initialization.c", skip version parsing!'
161 )
162 self.version = None
163 return
164 text = open(init_c, "r", encoding="utf8").read()
165 pattern_version = r"SPARC\s+\(\s*?version(.*?)\)"
166 match = re.findall(pattern_version, text)
167 if len(match) != 1:
168 warn(
169 'Parsing c source file "initialization.c" for version is unsuccessful!'
170 )
171 self.version = None
172 return
173 # We need to add more spacing matching in case the source code includes extra
174 date_str = re.sub(r"\s+", " ", match[0].strip().replace(",", " "))
175 # Older version of SPARC doc may contain abbreviated month format
176 date_version = None
177 for fmt in ("%b %d %Y", "%B %d %Y"):
178 try:
179 date_version = datetime.strptime(date_str, fmt).strftime("%Y.%m.%d")
180 break
181 except Exception:
182 continue
183 if date_version is None:
184 raise ValueError(f"Cannot parse date time {date_str}")
185 self.version = date_version
186 return
188 def __parse_parameter_from_frame(self, frame):
189 """Parse the parameters from a single LaTeX frame
191 Args:
192 frame (str): a string containing the LaTeX frame (e.g. \\begin{frame} ... \\end{frame})
194 Returns:
195 dict: a key-value paired dict parsed from the frame. Some field names include:
196 name: TOL_POISSON
197 type: Double | Integer | String | Character | Double array
198 unit: specified in the doc
199 """
200 pattern_label = r"\\texttt\{(.*?)\}.*?\\label\{(.*?)\}"
201 pattern_block = r"\\begin\{block\}\{(.*?)\}([\s\S]*?)\\end\{block\}"
202 match_label = re.findall(pattern_label, frame, re.DOTALL | re.MULTILINE)
203 if len(match_label) != 1:
204 warn("Provided a non-structured frame for parsing, skip.")
205 return {}
206 symbol, label = (
207 convert_tex_parameter(match_label[0][0].strip()),
208 match_label[0][1].strip(),
209 )
210 # Every match contains the (name, content) pair of the blocks
211 matches = re.findall(pattern_block, frame, re.DOTALL | re.MULTILINE)
212 param_dict = {"symbol": symbol, "label": label}
213 # TODO: add more type definition
214 for key, content in matches:
215 key = key.lower()
216 content = content.strip()
217 # Do not parse commented-out values
219 if (key == "type") and (content.startswith("%")):
220 warn(f"Parameter {symbol} is disabled in the doc, ignore!")
221 return {}
222 if key in ("example",):
223 content = convert_tex_example(content)
224 param_dict[key] = content
225 # Sanitize 1: Convert types
226 param_dict = sanitize_type(param_dict)
227 # Sanitize 2: Convert default values
228 param_dict = sanitize_default(param_dict)
229 # Sanitize 3: Remove TeX components in description and remark
230 param_dict = sanitize_description(param_dict)
232 return param_dict
234 def __parse_frames_from_text(self, text):
235 """Extract all the frames that aren't commented in the text
237 Arguments:
238 text (str): Full LaTeX text
239 Returns:
240 list: Matched LaTeX Beamer frame fragments
241 """
242 pattern_frame = r"\\begin\{frame\}(.*?)\\end\{frame\}"
243 matches = re.findall(pattern_frame, text, re.DOTALL | re.MULTILINE)
244 return matches
246 def __parse_intro_file(self):
247 """Parse the introduction file
249 Returns:
250 parameter_dict (dict): dictionary using the parameter category as the main key
251 (following order in Introduction.tex)
252 parameter_categories (list): list of categories
253 """
254 text_intro = open(self.intro_file, "r", encoding="utf8").read()
255 pattern_params = (
256 r"^\\begin\{frame\}.*?\{Input file options\}.*?$(.*?)\\end\{frame\}"
257 )
258 pattern_block = r"\\begin\{block\}\{(.*?)\}([\s\S]*?)\\end\{block\}"
259 pattern_line = r"\\hyperlink\{(.*?)\}{\\texttt\{(.*?)\}\}"
260 text_params = re.findall(pattern_params, text_intro, re.DOTALL | re.MULTILINE)[
261 0
262 ]
263 parameter_categories = []
264 parameter_dict = {}
265 for match in re.findall(pattern_block, text_params):
266 cat = match[0].lower()
267 # print(cat)
268 if cat in parameter_categories:
269 raise ValueError(
270 f"Key {cat} already exists! You might have a wrong LaTeX doc file!"
271 )
272 parameter_categories.append(cat)
273 parameter_dict[cat] = []
274 param_lines = match[1].split("\n")
275 for line in param_lines:
276 matches = re.findall(pattern_line, line)
277 if len(matches) == 0:
278 continue
279 # Each match should contain 2 items, the "Link" that matches a reference in included-tex files
280 # symbol is the actual symbol name (in text-format)
281 # In most cases the link and symbol should be the same
282 for match in matches:
283 label, symbol = match[0].strip(), convert_tex_parameter(
284 match[1].strip()
285 )
286 parameter_dict[cat].append({"label": label, "symbol": symbol})
287 return parameter_categories, parameter_dict
289 def __parse_all_included_files(self):
290 """Pop up all known parameters from included files
291 Returns:
292 dict: All known parameters from included files
293 """
294 all_params = {}
295 for f in self.include_files:
296 # Do not parse intro file since it's waste of time
297 if f.resolve() == self.intro_file.resolve():
298 continue
299 text = open(f, "r", encoding="utf8").read()
300 frames = self.__parse_frames_from_text(text)
301 for frame in frames:
302 dic = self.__parse_parameter_from_frame(frame)
303 if len(dic) > 0:
304 label = dic["label"]
305 all_params[label] = dic
306 return all_params
308 def parse_parameters(self):
309 """The actual thing for parsing parameters
311 Sets:
312 parameters (dict): All parsed parameters
313 parameter_categoris (list): List of categories
314 other_parameters (dict): Any parameters that are not included in the categories
315 """
316 parameter_categories, parameter_dict = self.__parse_intro_file()
317 all_params = self.__parse_all_included_files()
318 self.parameter_categories = parameter_categories
319 # parameters contain only the "valid" ones that are shown in the intro
320 # all others are clustered in "other_parameters"
321 self.parameters = {}
322 for cat, params in parameter_dict.items():
323 for p in params:
324 label = p["label"]
325 symbol = p["symbol"]
326 param_details = all_params.pop(label, {})
327 if param_details != {}:
328 param_details["category"] = cat
329 self.parameters[symbol] = param_details
331 self.other_parameters = {}
332 for param_details in all_params.values():
333 symbol = param_details["symbol"]
334 self.other_parameters[symbol] = param_details
335 return
337 def postprocess(self):
338 """Use the hardcoded dict prostprocess_items to fix some issues"""
339 for param, fix in postprocess_items.items():
340 if param in self.parameters:
341 self.parameters[param].update(**fix)
342 return
344 def to_dict(self):
345 """Output a json dict from current document parser
347 Returns:
348 dict: All API schemes in dict
349 """
350 doc = {}
351 doc["sparc_version"] = self.version
352 doc["categories"] = self.parameter_categories
353 doc["parameters"] = {k: v for k, v in sorted(self.parameters.items())}
354 doc["other_parameters"] = {
355 k: v for k, v in sorted(self.other_parameters.items())
356 }
357 doc["data_types"] = sorted(set([p["type"] for p in self.parameters.values()]))
358 return doc
360 @classmethod
361 def json_from_directory(cls, directory=".", include_subdirs=True, **kwargs):
362 """
363 Recursively add parameters from all Manual files
364 Arguments:
365 directory (str or PosixPath): The directory to the LaTeX files, e.g. <sparc-root>/doc/.LaTeX
366 include_subdirs (bool): If true, also parse the manual files in submodules, e.g. cyclix, highT
367 Returns:
368 str: Formatted json-string of the API
369 """
370 directory = Path(directory)
371 root_dict = cls(directory=directory, **kwargs).to_dict()
372 if include_subdirs:
373 for sub_manual_tex in directory.glob("*/*Manual.tex"):
374 subdir = sub_manual_tex.parent
375 try:
376 sub_dict = cls(directory=subdir, parse_version=False).to_dict()
377 except FileNotFoundError:
378 print(
379 subdir,
380 " Latex files not found. Check naming conventions for Manual.tex. Expects format *Manual.tex",
381 )
382 continue
383 for param, param_desc in sub_dict["parameters"].items():
384 if param not in root_dict["parameters"]:
385 root_dict["parameters"][param] = param_desc
386 # Combine the subdir categories
387 for sub_category in sub_dict["categories"]:
388 if sub_category not in root_dict["categories"]:
389 root_dict["categories"].append(sub_category)
390 # Combine data types
391 for sub_dt in sub_dict["data_types"]:
392 if sub_dt not in root_dict["data_types"]:
393 root_dict["data_types"].append(sub_dt)
395 json_string = json.dumps(root_dict, indent=True)
396 return json_string
398 @classmethod
399 def json_from_repo(
400 cls, url=sparc_repo_url, version="master", include_subdirs=True, **kwargs
401 ):
402 """
403 Download the source code from git and use json_from_directory to parse
404 Arguments:
405 url (str): URL for the repository of SPARC, default is "https://github.com/SPARC-X/SPARC.git"
406 version (str): Git version or commit hash of the SPARC repo
407 include_subdirs (bool): If true, also parse the manual files in submodules, e.g. cyclix, highT
408 Returns:
409 str: Formatted json-string of the API
410 """
411 import tempfile
412 from subprocess import run
414 with tempfile.TemporaryDirectory() as tmpdir:
415 tmpdir = Path(tmpdir)
416 download_dir = tmpdir / "SPARC"
417 download_cmds = ["git", "clone", "--depth", "1", str(url), "SPARC"]
418 run(download_cmds, cwd=tmpdir)
419 if version not in ["master", "HEAD"]:
420 fetch_cmds = ["git", "fetch", "--depth", "1", str(version)]
421 run(fetch_cmds, cwd=download_dir)
422 checkout_cmds = ["git", "checkout", str(version)]
423 run(checkout_cmds, cwd=download_dir)
424 json_string = cls.json_from_directory(
425 directory=download_dir / "doc" / ".LaTeX",
426 include_subdirs=include_subdirs,
427 **kwargs,
428 )
429 return json_string
432def convert_tex_parameter(text):
433 """
434 Conver a TeX string to non-escaped name (for parameter only)
435 Arguments:
436 text (str): Parameter name in LaTeX format
437 Returns:
438 str: Text with sanitized parameter
439 """
440 return text.strip().replace("\_", "_")
443def convert_tex_example(text):
444 """Convert TeX codes of examples as much as possible
445 The examples follow the format
446 SYMBOL: values (may contain new lines)
447 Arguments:
448 text (str): Single or multiline LaTeX contents
449 Returns:
450 str: Sanitized literal text
451 """
452 mapper = {"\\texttt{": "", "\_": "_", "}": "", "\\": "\n"}
453 new_text = copy(text)
454 for m, r in mapper.items():
455 new_text = new_text.replace(m, r)
457 symbol, values = new_text.split(":")
458 symbol = symbol.strip()
459 values = re.sub("\n+", "\n", values.strip())
460 # Remove all comment lines
461 values = "\n".join(
462 [l for l in values.splitlines() if not l.lstrip().startswith("%")]
463 )
464 new_text = f"{symbol}: {values}"
465 return new_text
468def convert_tex_default(text, desired_type=None):
469 """Convert default values as much as possible.
470 The desire type will convert the default values
471 to the closest format
473 Currently supported conversions
474 1. Remove all surrounding text modifiers (texttt)
475 2. Remove all symbol wrappers $
476 3. Convert value to single or array
478 Arguments:
479 text (str): Raw text string for value
480 desired_type (str or None): Data type to be converted to. If None, preserve the string format
482 Returns:
483 converted: Value converted from raw text
484 """
485 mapper = {
486 "\\texttt{": "",
487 "}": "",
488 "{": "",
489 "\\_": "_",
490 "\_": "_",
491 "\\\\": "\n",
492 "$": "",
493 }
494 text = text.strip()
495 text = re.sub(r"\\hyperlink\{.*?\}", "", text)
496 text = re.sub(r"\\times", "x", text)
497 for m, r in mapper.items():
498 text = text.replace(m, r)
499 text = re.sub(r"\n+", "\n", text)
500 # Remove all comment lines
501 text = "\n".join([l for l in text.splitlines() if not l.lstrip().startswith("%")])
503 # print(text)
504 converted = None
505 if "none" in text.lower():
506 converted = None
507 elif "no default" in text.lower():
508 converted = None
509 elif "automat" in text.lower():
510 converted = "auto"
511 else:
512 # try type conversion
513 if desired_type is None:
514 converted = text
515 elif desired_type == "string":
516 converted = text
517 else:
518 converted = text2value(text, desired_type)
519 return converted
522def convert_comment(text):
523 """Used to remove TeX-specific commands in description and remarks
524 as much as possible
526 Arguments:
527 text (str): Raw LaTeX code for the comment section in manual
529 Returns:
530 str: Sanitized plain text
531 """
532 mapper = {
533 "\\texttt{": "",
534 "}": "",
535 "{": "",
536 "\\_": "_",
537 "\_": "_",
538 "\\\\": "\n",
539 "$": "",
540 }
541 text = text.strip()
542 text = re.sub(r"\\hyperlink\{.*?\}", "", text)
543 text = re.sub(r"\\href\{.*?\}", "", text)
544 text = re.sub(r"\\times", "x", text)
545 for m, r in mapper.items():
546 text = text.replace(m, r)
547 text = re.sub(r"\n+", "\n", text)
548 # Remove all comment lines
549 text = "\n".join([l for l in text.splitlines() if not l.lstrip().startswith("%")])
550 return text
553def text2value(text, desired_type):
554 """Convert raw text to a desired type
556 Arguments:
557 text (str): Text contents for the value
558 desired_type (str): Target data type from 'string', 'integer',
559 'integer array', 'double', 'double array',
560 'bool', 'bool array'
561 Returns:
562 converted: Value converted to the desired type
563 """
564 if desired_type is None:
565 return text
566 desired_type = desired_type.lower()
567 if desired_type == "string":
568 return text.strip()
570 try:
571 arr = np.genfromtxt(text.splitlines(), delimiter=" ", dtype=float)
572 if np.isnan(arr).any():
573 warn(
574 f"Some fields in {text} cannot converted to a numerical array, will skip conversion."
575 )
576 arr = None
577 except Exception as e:
578 warn(
579 f"Cannot transform {text} to array, skip converting. Error message is:\n {e}"
580 )
581 arr = None
583 if arr is None:
584 return None
586 # Upshape ndarray to at least 1D
587 if arr.shape == ():
588 arr = np.reshape(arr, [1])
590 converted = None
591 from contextlib import suppress
593 # Ignore all failures and make conversion None
594 with suppress(Exception):
595 if desired_type == "integer":
596 converted = int(arr[0])
597 elif desired_type == "bool":
598 converted = bool(arr[0])
599 elif desired_type == "double":
600 converted = float(arr[0])
601 elif desired_type == "integer array":
602 converted = np.ndarray.tolist(arr.astype(int))
603 elif desired_type == "bool array":
604 converted = np.ndarray.tolist(arr.astype(bool))
605 elif desired_type == "double array":
606 converted = np.ndarray.tolist(arr.astype(float))
607 return converted
610def is_array(text):
611 """Simply try to convert a string into a numpy array and compare if length is larger than 1
612 it is only used to compare a float / int value
613 """
614 val = np.fromstring(text, sep=" ")
615 if len(val) == 1:
616 return False
617 else:
618 return True
621def contain_only_bool(text):
622 """Check if a string only contains 0 1 or spaces"""
623 if any([c in text for c in (".", "+", "-", "e", "E")]):
624 return False
625 digits = re.findall(r"[-+e\d]+", text, re.DOTALL)
626 for d in digits:
627 val = int(d)
628 if val not in (0, 1):
629 return False
630 return True
633def sanitize_description(param_dict):
634 """Sanitize the description and remark field
636 Arguments:
637 param_dict (dict): Raw dict for one parameter entry
639 Returns:
640 dict: Sanitized parameter dict with comment, remark and description
641 converted to human-readable formats
642 """
643 sanitized_dict = param_dict.copy()
645 original_desc = sanitized_dict["description"]
646 sanitized_dict["description_raw"] = original_desc
648 original_remark = sanitized_dict.get("remark", "")
649 sanitized_dict["remark_raw"] = original_remark
651 sanitized_dict["description"] = convert_comment(original_desc)
652 sanitized_dict["remark"] = convert_comment(original_remark)
653 return sanitized_dict
656def sanitize_default(param_dict):
657 """Sanitize the default field
658 1. Create an extra field `default_remark` that copies original default
659 2. Use `convert_tex_default` to convert values as much as possible
661 This function should be called after sanitize_type
662 """
663 sanitized_dict = param_dict.copy()
664 original_default = sanitized_dict["default"]
665 sanitized_dict["default_remark"] = original_default
666 converted_default = convert_tex_default(original_default, param_dict["type"])
667 sanitized_dict["default"] = converted_default
668 return sanitized_dict
671def sanitize_type(param_dict):
672 """Sanitize the param dict so that the type are more consistent
674 For example, if type is Double / Integer,
675 but parameter is a vector,
676 make a double vector or integer vector
677 """
678 sanitized_dict = param_dict.copy()
679 symbol = param_dict["symbol"]
680 origin_type = param_dict.get("type", None)
681 if origin_type is None:
682 print("Dict does not have type!")
683 return sanitized_dict
684 origin_type = origin_type.lower()
686 sanitized_type = None
687 sanitized_dict["allow_bool_input"] = False
688 # First pass, remove all singular types
689 if origin_type == "0 or 1":
690 origin_type = "integer"
691 elif "permutation" in origin_type:
692 sanitized_type = "integer"
693 elif origin_type in ("string", "character"):
694 sanitized_type = "string"
695 elif "array" in origin_type:
696 sanitized_type = origin_type
698 # Pass 2, test if int values are arrays
699 if (origin_type in ["int", "integer", "double"]) and (sanitized_type is None):
700 if "int" in origin_type:
701 origin_type = "integer"
702 # Test if the value from example is a single value or array
703 try:
704 example_value = param_dict["example"].split(":")[1]
705 default = param_dict["default"]
706 _array_test = is_array(example_value)
707 _bool_test = contain_only_bool(example_value) and contain_only_bool(default)
708 except Exception as e:
709 warn(
710 f"Array conversion failed for {example_value}, ignore."
711 f"The error is {e}"
712 )
713 _array_test = False # Retain
715 if _array_test is True:
716 sanitized_type = f"{origin_type} array"
717 else:
718 sanitized_type = origin_type
720 # Pass 3: int to boolean test. This should be done very tight
721 if _bool_test and ("integer" in sanitized_type):
722 sanitized_dict["allow_bool_input"] = True
724 if sanitized_type is None:
725 # Currently there is only one NPT_NH_QMASS has this type
726 # TODO: think of a way to format a mixed array?
727 warn(f"Type of {symbol} if not standard digit or array, mark as others.")
728 sanitized_type = "other"
729 # TODO: how about provide a true / false type?
730 sanitized_dict["type"] = sanitized_type
731 return sanitized_dict
734if __name__ == "__main__":
735 # Run the module as independent script to extract a json-formatted parameter list
736 from argparse import ArgumentParser
738 argp = ArgumentParser(description="Parse the LaTeX doc to json")
739 argp.add_argument(
740 "-o",
741 "--output",
742 default="parameters.json",
743 help="Output file name (json-formatted)",
744 )
745 argp.add_argument(
746 "--include-subdirs",
747 action="store_true",
748 help="Parse manual parameters from subdirs",
749 )
750 argp.add_argument("--git", action="store_true")
751 argp.add_argument(
752 "--version",
753 default="master",
754 help="Version of the doc. Only works when using git repo",
755 )
756 argp.add_argument(
757 "root",
758 nargs="?",
759 help=(
760 "Root of the SPARC doc LaTeX files, or remote git repo link. If not provided and --git is enables, use the default github repo"
761 ),
762 )
764 args = argp.parse_args()
765 output = Path(args.output).with_suffix(".json")
766 if args.git:
767 if args.root is None:
768 root = sparc_repo_url
769 else:
770 root = args.root
771 json_string = SparcDocParser.json_from_repo(
772 url=root, version=args.version, include_subdirs=args.include_subdirs
773 )
774 else:
775 json_string = SparcDocParser.json_from_directory(
776 directory=Path(args.root), include_subdirs=args.include_subdirs
777 )
778 with open(output, "w", encoding="utf8") as fd:
779 fd.write(json_string)
780 print(f"SPARC parameter specifications written to {output}!")
781 print("If you need to fintune the definitions, please edit them manually.")