Spaces:

MilesCranmer
/

PySR

Sleeping

App Files Files Community

MilesCranmer commited on Sep 17, 2023

Commit

09a7186

1 Parent(s): 5620b3a

Refactor utility functions

Browse files

Files changed (2) hide show

pysr/sr.py +8 -52
pysr/utils.py +55 -0

pysr/sr.py CHANGED Viewed

@@ -33,6 +33,12 @@ from .julia_helpers import (
     init_julia,
     is_julia_version_greater_eq,
 )
 Main = None  # TODO: Rename to more descriptive name like "julia_runtime"
@@ -945,10 +951,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         model : PySRRegressor
             The model with fitted equations.
         """
-        if os.path.splitext(equation_file)[1] != ".pkl":
-            pkl_filename = _csv_filename_to_pkl_filename(equation_file)
-        else:
-            pkl_filename = equation_file
         # Try to load model from <equation_file>.pkl
         print(f"Checking if {pkl_filename} exists...")
@@ -2437,51 +2441,3 @@ def run_feature_selection(X, y, select_k_features, random_state=None):
         clf, threshold=-np.inf, max_features=select_k_features, prefit=True
     )
     return selector.get_support(indices=True)
-def _csv_filename_to_pkl_filename(csv_filename) -> str:
-    # Assume that the csv filename is of the form "foo.csv"
-    assert str(csv_filename).endswith(".csv")
-    dirname = str(os.path.dirname(csv_filename))
-    basename = str(os.path.basename(csv_filename))
-    base = str(os.path.splitext(basename)[0])
-    pkl_basename = base + ".pkl"
-    return os.path.join(dirname, pkl_basename)
-_regexp_im = re.compile(r"\b(\d+\.\d+)im\b")
-_regexp_im_sci = re.compile(r"\b(\d+\.\d+)[eEfF]([+-]?\d+)im\b")
-_regexp_sci = re.compile(r"\b(\d+\.\d+)[eEfF]([+-]?\d+)\b")
-_apply_regexp_im = lambda x: _regexp_im.sub(r"\1j", x)
-_apply_regexp_im_sci = lambda x: _regexp_im_sci.sub(r"\1e\2j", x)
-_apply_regexp_sci = lambda x: _regexp_sci.sub(r"\1e\2", x)
-def _preprocess_julia_floats(s: str) -> str:
-    if isinstance(s, str):
-        s = _apply_regexp_im(s)
-        s = _apply_regexp_im_sci(s)
-        s = _apply_regexp_sci(s)
-    return s
-def _subscriptify(i: int) -> str:
-    """Converts integer to subscript text form.
-    For example, 123 -> "₁₂₃".
-    """
-    return "".join([chr(0x2080 + int(c)) for c in str(i)])
-def _safe_check_feature_names_in(self, variable_names, generate_names=True):
-    """_check_feature_names_in with compat for old versions."""
-    try:
-        return _check_feature_names_in(
-            self, variable_names, generate_names=generate_names
-        )
-    except TypeError:
-        return _check_feature_names_in(self, variable_names)

     init_julia,
     is_julia_version_greater_eq,
 )
+from .utils import (
+    _csv_filename_to_pkl_filename,
+    _preprocess_julia_floats,
+    _safe_check_feature_names_in,
+    _subscriptify,
+)
 Main = None  # TODO: Rename to more descriptive name like "julia_runtime"
         model : PySRRegressor
             The model with fitted equations.
         """
+        pkl_filename = _csv_filename_to_pkl_filename(equation_file)
         # Try to load model from <equation_file>.pkl
         print(f"Checking if {pkl_filename} exists...")
         clf, threshold=-np.inf, max_features=select_k_features, prefit=True
     )
     return selector.get_support(indices=True)

pysr/utils.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import os
+import re
+from sklearn.utils.validation import _check_feature_names_in
+def _csv_filename_to_pkl_filename(csv_filename: str) -> str:
+    if os.path.splitext(csv_filename)[1] == ".pkl":
+        return csv_filename
+    # Assume that the csv filename is of the form "foo.csv"
+    assert str(csv_filename).endswith(".csv")
+    dirname = str(os.path.dirname(csv_filename))
+    basename = str(os.path.basename(csv_filename))
+    base = str(os.path.splitext(basename)[0])
+    pkl_basename = base + ".pkl"
+    return os.path.join(dirname, pkl_basename)
+_regexp_im = re.compile(r"\b(\d+\.\d+)im\b")
+_regexp_im_sci = re.compile(r"\b(\d+\.\d+)[eEfF]([+-]?\d+)im\b")
+_regexp_sci = re.compile(r"\b(\d+\.\d+)[eEfF]([+-]?\d+)\b")
+_apply_regexp_im = lambda x: _regexp_im.sub(r"\1j", x)
+_apply_regexp_im_sci = lambda x: _regexp_im_sci.sub(r"\1e\2j", x)
+_apply_regexp_sci = lambda x: _regexp_sci.sub(r"\1e\2", x)
+def _preprocess_julia_floats(s: str) -> str:
+    if isinstance(s, str):
+        s = _apply_regexp_im(s)
+        s = _apply_regexp_im_sci(s)
+        s = _apply_regexp_sci(s)
+    return s
+def _safe_check_feature_names_in(self, variable_names, generate_names=True):
+    """_check_feature_names_in with compat for old versions."""
+    try:
+        return _check_feature_names_in(
+            self, variable_names, generate_names=generate_names
+        )
+    except TypeError:
+        return _check_feature_names_in(self, variable_names)
+def _subscriptify(i: int) -> str:
+    """Converts integer to subscript text form.
+    For example, 123 -> "₁₂₃".
+    """
+    return "".join([chr(0x2080 + int(c)) for c in str(i)])