sudoping01 commited on
Commit
9b9a245
Β·
verified Β·
1 Parent(s): e6f7ac5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +241 -95
app.py CHANGED
@@ -1,155 +1,301 @@
1
  #!/usr/bin/env python3
2
  """
3
  Hugging Face Space Entry Point
4
- This script clones the GitHub repository and runs the actual app from it.
5
- This way, git operations in the app target the GitHub repo, not the HF Space.
6
- """
7
 
 
 
 
 
 
8
 
 
 
 
 
9
 
10
  import os
11
  import subprocess
12
  import sys
 
13
  from pathlib import Path
14
 
15
- def run_command(cmd, check=True, cwd=None):
16
- """Run a command and handle errors"""
17
- print(f"Running: {' '.join(cmd) if isinstance(cmd, list) else cmd}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  try:
19
  result = subprocess.run(
20
  cmd if isinstance(cmd, list) else cmd.split(),
21
  check=check,
22
- capture_output=True,
23
  text=True,
24
  cwd=cwd
25
  )
26
- if result.stdout:
27
- print(result.stdout)
28
- if result.stderr:
29
- print(result.stderr, file=sys.stderr)
 
 
 
30
  return result
 
31
  except subprocess.CalledProcessError as e:
32
- print(f"Error: {e}")
33
  if e.stdout:
34
- print(f"stdout: {e.stdout}")
35
  if e.stderr:
36
- print(f"stderr: {e.stderr}")
37
  raise
38
 
39
- def setup_and_run():
40
- """Clone GitHub repo and run the app"""
41
-
42
- # Configuration
43
- GITHUB_USER = os.getenv('GITHUB_USER', 'sudoping01')
44
- GITHUB_REPO = os.getenv('GITHUB_REPO', 'MALIBA-AI/bambara-asr-leaderboard')
45
- GITHUB_TOKEN = os.getenv('GITHUB_TOKEN', '')
46
- GITHUB_EMAIL = os.getenv('GITHUB_EMAIL', '[email protected]')
47
-
48
- # Ensure HF_TOKEN is set (even if empty string) to avoid Pydantic validation error
49
- if 'HF_TOKEN' not in os.environ:
50
- os.environ['HF_TOKEN'] = ''
51
-
52
- # Directory structure
53
- repo_dir = Path("/tmp/github_repo")
54
- app_dir = repo_dir / "space"
55
-
56
- print("=" * 50)
57
- print("Hugging Face Space - GitHub Integration Setup")
58
- print("=" * 50)
59
 
60
- # Clone or update repository
 
 
 
 
 
 
 
 
 
61
  if not repo_dir.exists():
62
- print(f"\nπŸ“¦ Cloning repository: {GITHUB_REPO}")
63
 
64
- if GITHUB_TOKEN:
65
- # Use token for authentication
66
- repo_url = f"https://{GITHUB_USER}:{GITHUB_TOKEN}@github.com/{GITHUB_REPO}.git"
67
  else:
68
- # Public repo or using other auth methods
69
- repo_url = f"https://github.com/{GITHUB_REPO}.git"
70
 
 
71
  run_command(["git", "clone", repo_url, str(repo_dir)])
72
 
73
- # Configure git in the cloned repo
74
- run_command(["git", "config", "user.email", GITHUB_EMAIL], cwd=repo_dir)
75
- run_command(["git", "config", "user.name", GITHUB_USER], cwd=repo_dir)
76
 
77
- # Set up remote URL with token if available
78
- if GITHUB_TOKEN:
79
- remote_url = f"https://{GITHUB_USER}:{GITHUB_TOKEN}@github.com/{GITHUB_REPO}.git"
80
  run_command(["git", "remote", "set-url", "origin", remote_url], cwd=repo_dir)
 
 
 
81
  else:
82
  print(f"\nπŸ”„ Repository exists, pulling latest changes...")
83
- run_command(["git", "pull"], cwd=repo_dir, check=False)
 
 
 
 
 
84
 
85
- # Verify the space directory exists
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  if not app_dir.exists():
87
  print(f"\n❌ Error: 'space' directory not found in {repo_dir}")
88
  print("Available directories:")
89
  for item in repo_dir.iterdir():
90
  print(f" - {item.name}")
 
91
  sys.exit(1)
92
 
93
- print(f"\nβœ… App directory found: {app_dir}")
94
 
95
- # Change to app directory
96
  os.chdir(app_dir)
97
- print(f"Working directory: {os.getcwd()}")
98
 
99
- # Install requirements if they exist
100
- requirements_file = app_dir / "requirements.txt"
101
- if requirements_file.exists():
102
- print("\nπŸ“š Installing requirements...")
103
- run_command([sys.executable, "-m", "pip", "install", "-r", str(requirements_file)])
 
104
 
105
- # IMPORTANT: Update settings to point to the GitHub repo directory
106
- # This ensures all file operations (like leaderboard.csv) happen in the GitHub repo
107
- os.environ['LEADERBOARD_FILE'] = str(repo_dir / "leaderboard.csv")
 
 
 
108
 
109
- # Run the actual app
110
  app_file = app_dir / "app.py"
111
- if app_file.exists():
112
- print("\nπŸš€ Starting application...")
113
- print("=" * 50)
114
-
115
- # Import and run the app
116
- sys.path.insert(0, str(app_dir))
117
-
118
- # Import the necessary modules to patch git operations
119
- import subprocess as orig_subprocess
120
-
121
- # Monkey-patch subprocess to run git commands in the repo directory
122
- original_run = orig_subprocess.run
123
-
124
- def patched_run(cmd, *args, **kwargs):
125
- # If it's a git command and no cwd is specified, use repo_dir
126
- if isinstance(cmd, list) and len(cmd) > 0 and cmd[0] == 'git':
127
- if 'cwd' not in kwargs:
128
- kwargs['cwd'] = str(repo_dir)
129
- print(f"[Git patch] Running git command in: {kwargs['cwd']}")
130
- elif isinstance(cmd, str) and cmd.startswith('git'):
131
- if 'cwd' not in kwargs:
132
- kwargs['cwd'] = str(repo_dir)
133
- print(f"[Git patch] Running git command in: {kwargs['cwd']}")
134
- return original_run(cmd, *args, **kwargs)
135
-
136
- # Apply the patch
137
- orig_subprocess.run = patched_run
138
-
139
- # Execute the app
140
  with open(app_file) as f:
141
  exec(f.read(), {'__name__': '__main__'})
142
- else:
143
- print(f"\n❌ Error: app.py not found in {app_dir}")
 
 
144
  sys.exit(1)
145
 
146
- if __name__ == "__main__":
 
 
147
  try:
148
  setup_and_run()
149
  except KeyboardInterrupt:
150
  print("\n\nπŸ‘‹ Application stopped by user")
 
151
  except Exception as e:
152
- print(f"\n❌ Error: {e}")
153
  import traceback
154
  traceback.print_exc()
155
- sys.exit(1)
 
 
 
 
 
1
  #!/usr/bin/env python3
2
  """
3
  Hugging Face Space Entry Point
4
+ ================================
5
+ This script clones a GitHub repository and runs the application from it.
6
+ This architecture ensures git operations target the GitHub repo, not the HF Space.
7
 
8
+ Architecture:
9
+ - HF Space contains only this entry point
10
+ - Actual application code lives in GitHub repository
11
+ - Git operations are patched to target the cloned GitHub repo
12
+ - File operations (like leaderboard.csv) happen in the GitHub repo directory
13
 
14
+ Security:
15
+ - All sensitive information is masked in logs
16
+ - Tokens are never printed or exposed
17
+ """
18
 
19
  import os
20
  import subprocess
21
  import sys
22
+ import re
23
  from pathlib import Path
24
 
25
+
26
+ def mask_sensitive_info(text):
27
+ """
28
+ Mask sensitive information in text for safe logging.
29
+
30
+ Args:
31
+ text: String that may contain sensitive information
32
+
33
+ Returns:
34
+ String with sensitive information masked
35
+ """
36
+ if text is None:
37
+ return text
38
+
39
+ # Mask various token patterns
40
+ patterns = [
41
+ (r'ghp_[a-zA-Z0-9]{36,}', 'ghp_***'), # GitHub Personal Access Token
42
+ (r'github_pat_[a-zA-Z0-9_]{82,}', 'github_pat_***'), # GitHub PAT (new format)
43
+ (r'hf_[a-zA-Z0-9]{20,}', 'hf_***'), # Hugging Face token
44
+ (r'://[^:@]+:[^:@]+@', '://***:***@'), # URLs with credentials
45
+ ]
46
+
47
+ masked = text
48
+ for pattern, replacement in patterns:
49
+ masked = re.sub(pattern, replacement, masked)
50
+
51
+ return masked
52
+
53
+
54
+ def run_command(cmd, check=True, cwd=None, capture_output=True):
55
+ """
56
+ Execute a shell command with error handling and secure logging.
57
+
58
+ Args:
59
+ cmd: Command to execute (list or string)
60
+ check: Whether to raise exception on non-zero exit
61
+ cwd: Working directory for the command
62
+ capture_output: Whether to capture stdout/stderr
63
+
64
+ Returns:
65
+ subprocess.CompletedProcess object
66
+ """
67
+ # Format command for display
68
+ cmd_display = ' '.join(cmd) if isinstance(cmd, list) else cmd
69
+ print(f"Running: {mask_sensitive_info(cmd_display)}")
70
+
71
  try:
72
  result = subprocess.run(
73
  cmd if isinstance(cmd, list) else cmd.split(),
74
  check=check,
75
+ capture_output=capture_output,
76
  text=True,
77
  cwd=cwd
78
  )
79
+
80
+ # Print output with sensitive info masked
81
+ if result.stdout and capture_output:
82
+ print(mask_sensitive_info(result.stdout))
83
+ if result.stderr and capture_output:
84
+ print(mask_sensitive_info(result.stderr), file=sys.stderr)
85
+
86
  return result
87
+
88
  except subprocess.CalledProcessError as e:
89
+ print(f"❌ Command failed with exit code {e.returncode}")
90
  if e.stdout:
91
+ print(f"stdout: {mask_sensitive_info(e.stdout)}")
92
  if e.stderr:
93
+ print(f"stderr: {mask_sensitive_info(e.stderr)}")
94
  raise
95
 
96
+
97
+ def setup_github_repo(repo_dir, github_user, github_repo, github_token, github_email):
98
+ """
99
+ Clone or update the GitHub repository.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
+ Args:
102
+ repo_dir: Path where repo should be cloned
103
+ github_user: GitHub username
104
+ github_repo: Repository in format 'owner/repo'
105
+ github_token: GitHub authentication token (optional)
106
+ github_email: Email for git config
107
+
108
+ Returns:
109
+ True if successful, False otherwise
110
+ """
111
  if not repo_dir.exists():
112
+ print(f"\nπŸ“¦ Cloning repository: {github_repo}")
113
 
114
+ # Build repository URL
115
+ if github_token and github_token.strip():
116
+ repo_url = f"https://{github_user}:{github_token}@github.com/{github_repo}.git"
117
  else:
118
+ # Public repo fallback
119
+ repo_url = f"https://github.com/{github_repo}.git"
120
 
121
+ # Clone repository
122
  run_command(["git", "clone", repo_url, str(repo_dir)])
123
 
124
+ # Configure git user
125
+ run_command(["git", "config", "user.email", github_email], cwd=repo_dir)
126
+ run_command(["git", "config", "user.name", github_user], cwd=repo_dir)
127
 
128
+ # Set remote URL with token for push operations
129
+ if github_token and github_token.strip():
130
+ remote_url = f"https://{github_user}:{github_token}@github.com/{github_repo}.git"
131
  run_command(["git", "remote", "set-url", "origin", remote_url], cwd=repo_dir)
132
+
133
+ print("βœ… Repository cloned successfully")
134
+
135
  else:
136
  print(f"\nπŸ”„ Repository exists, pulling latest changes...")
137
+ result = run_command(["git", "pull"], cwd=repo_dir, check=False)
138
+
139
+ if result.returncode == 0:
140
+ print("βœ… Repository updated successfully")
141
+ else:
142
+ print("⚠️ Git pull failed, continuing with existing version")
143
 
144
+ return True
145
+
146
+
147
+ def setup_environment(app_dir, repo_dir):
148
+ """
149
+ Set up the Python environment and install dependencies.
150
+
151
+ Args:
152
+ app_dir: Path to the application directory
153
+ repo_dir: Path to the repository root
154
+ """
155
+ # Install requirements
156
+ requirements_file = app_dir / "requirements.txt"
157
+ if requirements_file.exists():
158
+ print("\nπŸ“š Installing requirements...")
159
+ run_command([sys.executable, "-m", "pip", "install", "-r", str(requirements_file)])
160
+ print("βœ… Requirements installed successfully")
161
+ else:
162
+ print("⚠️ No requirements.txt found, skipping dependency installation")
163
+
164
+ # Set environment variables for file paths
165
+ # This ensures files like leaderboard.csv are created in the GitHub repo
166
+ os.environ['LEADERBOARD_FILE'] = str(repo_dir / "leaderboard.csv")
167
+
168
+ # Add app directory to Python path
169
+ sys.path.insert(0, str(app_dir))
170
+
171
+
172
+ def patch_git_operations(repo_dir):
173
+ """
174
+ Monkey-patch subprocess.run to ensure git commands target the GitHub repo.
175
+
176
+ This prevents git operations from accidentally targeting the HF Space repo.
177
+
178
+ Args:
179
+ repo_dir: Path to the GitHub repository
180
+ """
181
+ import subprocess as orig_subprocess
182
+
183
+ original_run = orig_subprocess.run
184
+
185
+ def patched_run(cmd, *args, **kwargs):
186
+ """Patched subprocess.run that redirects git commands to repo_dir"""
187
+
188
+ # Check if this is a git command
189
+ is_git_command = False
190
+ if isinstance(cmd, list) and len(cmd) > 0 and cmd[0] == 'git':
191
+ is_git_command = True
192
+ elif isinstance(cmd, str) and cmd.strip().startswith('git'):
193
+ is_git_command = True
194
+
195
+ # If git command and no cwd specified, use repo_dir
196
+ if is_git_command and 'cwd' not in kwargs:
197
+ kwargs['cwd'] = str(repo_dir)
198
+ print(f"[Git Patch] Redirecting git command to: {mask_sensitive_info(str(repo_dir))}")
199
+
200
+ return original_run(cmd, *args, **kwargs)
201
+
202
+ # Apply the patch
203
+ orig_subprocess.run = patched_run
204
+ print("βœ… Git operations patched successfully")
205
+
206
+
207
+ def setup_and_run():
208
+ """
209
+ Main setup function: clone repo, configure environment, and run the app.
210
+ """
211
+
212
+ # Load configuration from environment variables
213
+ GITHUB_USER = os.getenv('GITHUB_USER', 'sudoping01')
214
+ GITHUB_REPO = os.getenv('GITHUB_REPO', 'MALIBA-AI/bambara-asr-leaderboard')
215
+ GITHUB_TOKEN = os.getenv('GITHUB_TOKEN', '')
216
+ GITHUB_EMAIL = os.getenv('GITHUB_EMAIL', '[email protected]')
217
+
218
+ # Define directory structure
219
+ repo_dir = Path("/tmp/github_repo")
220
+ app_dir = repo_dir / "space"
221
+
222
+ # Print startup banner
223
+ print("=" * 60)
224
+ print(" Hugging Face Space - GitHub Integration Setup")
225
+ print("=" * 60)
226
+ print(f"Repository: {GITHUB_REPO}")
227
+ print(f"Target directory: {repo_dir}")
228
+ print("=" * 60)
229
+
230
+ # Step 1: Setup GitHub repository
231
+ try:
232
+ setup_github_repo(repo_dir, GITHUB_USER, GITHUB_REPO, GITHUB_TOKEN, GITHUB_EMAIL)
233
+ except Exception as e:
234
+ print(f"\n❌ Failed to setup GitHub repository: {e}")
235
+ sys.exit(1)
236
+
237
+ # Step 2: Verify app directory exists
238
  if not app_dir.exists():
239
  print(f"\n❌ Error: 'space' directory not found in {repo_dir}")
240
  print("Available directories:")
241
  for item in repo_dir.iterdir():
242
  print(f" - {item.name}")
243
+ print("\nPlease ensure your repository has a 'space' directory with the application code.")
244
  sys.exit(1)
245
 
246
+ print(f"βœ… Application directory found: {app_dir}")
247
 
248
+ # Step 3: Change to app directory
249
  os.chdir(app_dir)
250
+ print(f"βœ… Working directory: {os.getcwd()}")
251
 
252
+ # Step 4: Setup environment
253
+ try:
254
+ setup_environment(app_dir, repo_dir)
255
+ except Exception as e:
256
+ print(f"\n❌ Failed to setup environment: {e}")
257
+ sys.exit(1)
258
 
259
+ # Step 5: Patch git operations
260
+ try:
261
+ patch_git_operations(repo_dir)
262
+ except Exception as e:
263
+ print(f"\n⚠️ Failed to patch git operations: {e}")
264
+ print("Continuing anyway...")
265
 
266
+ # Step 6: Run the application
267
  app_file = app_dir / "app.py"
268
+ if not app_file.exists():
269
+ print(f"\n❌ Error: app.py not found in {app_dir}")
270
+ sys.exit(1)
271
+
272
+ print("\n" + "=" * 60)
273
+ print(" πŸš€ Starting Application")
274
+ print("=" * 60 + "\n")
275
+
276
+ try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
  with open(app_file) as f:
278
  exec(f.read(), {'__name__': '__main__'})
279
+ except Exception as e:
280
+ print(f"\n❌ Application error: {e}")
281
+ import traceback
282
+ traceback.print_exc()
283
  sys.exit(1)
284
 
285
+
286
+ def main():
287
+ """Entry point with proper error handling."""
288
  try:
289
  setup_and_run()
290
  except KeyboardInterrupt:
291
  print("\n\nπŸ‘‹ Application stopped by user")
292
+ sys.exit(0)
293
  except Exception as e:
294
+ print(f"\n❌ Fatal error: {e}")
295
  import traceback
296
  traceback.print_exc()
297
+ sys.exit(1)
298
+
299
+
300
+ if __name__ == "__main__":
301
+ main()