Skip to content

Commit 0fc5914

Browse files
jvanhoyeclaude
andcommitted
Make artist processing abortable and resumable
On Spotify rate limit (>5 min wait) or Ctrl+C, the artist loop now exits cleanly via a stop_early flag instead of sys.exit(1), then falls through to the existing playlist-save code — so tracks already found in the interrupted run are saved to Spotify before exiting. Resumability is backed by two new tables in bpm_cache.db: - t_artists_processed: marks artists whose albums have been fully scanned - t_tracks_matched: stores raw (un-normalized) BPM for every resolved track On the next run, already-processed artists are skipped and their tracks are reloaded from disk, then re-normalized against the current floor/ceiling settings before being inserted into the in-memory playlist DB. Artists that were mid-flight when interrupted are NOT marked complete and will be fully re-scanned (any partial t_tracks_matched rows are safely overwritten). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 62c54e8 commit 0fc5914

2 files changed

Lines changed: 171 additions & 97 deletions

File tree

main.py

Lines changed: 150 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -179,110 +179,163 @@ def main():
179179

180180
# Find all artists using Search for Genre
181181
match_found = False
182+
stop_early = False
182183

183184
artists_to_process = discover_artists_for_genre(settings, primary_market='BE')
184185
print('Search for artists in genre', settings.genre_searchstring, 'yielded', len(artists_to_process), 'results.')
185186

186187
process = "?"
187-
for artist in artists_to_process:
188-
print('')
189-
print('♫', artist.name.upper())
190-
print('=' * 80)
191-
if settings.interactive_mode:
192-
if process not in ("A", "a"):
193-
process = "?"
194-
while process not in ("", "Y", "N", "C", "A", "y", "n", "c", "a"):
195-
process = input("Process this artist? (Enter = Yes; N = No, skip; C = Cancel, "
196-
"stop processing any more artists, A = Yes to all): ") or "Y"
197-
else:
198-
# all artists are processed in non-interactive mode
199-
process = "A"
200-
if process in ("C", "c"):
201-
break
202-
if process in ("A", "a", "Y", "y"):
203-
results = settings.spotify.artist_albums(artist_id=artist.uri, include_groups='album,single')
204-
albums = results['items']
205-
while results['next']:
206-
results = settings.spotify.next(results)
207-
albums.extend(results['items'])
208-
209-
# Build BPM providers once per artist, not per album
210-
bpm_providers = build_bpm_providers(settings)
211-
212-
for album in albums:
213-
print('◌', album['name'])
214-
while True:
215-
try:
216-
album = settings.spotify.album(album['uri'])
217-
tm.sleep(0.1) # gentle pacing — avoids exhausting Spotify's rate limit
218-
except Exception as ex:
219-
ex_str = str(ex)
220-
retry_match = re.search(r'Retry will occur after:\s*(\d+)', ex_str)
221-
if retry_match:
222-
wait_secs = int(retry_match.group(1))
223-
if wait_secs > 300:
224-
print(f"\n⛔ Spotify rate limit exhausted — retry allowed in "
225-
f"{wait_secs // 3600}h {(wait_secs % 3600) // 60}m ({wait_secs}s).")
226-
print(" The BPM cache preserves all work done so far.")
227-
print(" Run the program again after the wait period.")
228-
settings.sql_cursor.close()
229-
sys.exit(1)
230-
print(f"… Spotify rate limited, waiting {wait_secs}s before retry...")
231-
tm.sleep(wait_secs + 1)
232-
else:
233-
print(f"⚠ {type(ex).__name__}: {ex} — pausing 5s then retrying")
234-
tm.sleep(5)
235-
continue
236-
break
237-
238-
track_number = 0
239-
for track_obj in (album['tracks']['items'] or []):
240-
try:
241-
track_name = track_obj.get('name')
242-
track_uri = track_obj.get('uri')
243-
preview_url = track_obj.get('preview_url')
244-
artist_name = ''
245-
artists_list = track_obj.get('artists') or []
246-
if artists_list:
247-
artist_name = artists_list[0].get('name') or ''
248-
249-
# Fetch BPM — check persistent cache first to avoid re-downloading
250-
cached_row = settings.bpm_cache_cursor.execute(
251-
"SELECT bpm, source FROM t_bpm_cache WHERE track_uri = ?",
252-
(track_uri,)).fetchone()
253-
if cached_row:
254-
bpm_result = BpmResult(bpm=cached_row[0], source=f"{cached_row[1]}/cached")
255-
else:
256-
bpm_result = bpm_providers[0].get_bpm(
257-
artist_name=artist_name, track_name=track_name, preview_url=preview_url)
258-
if bpm_result.bpm is not None:
259-
settings.bpm_cache_cursor.execute(
260-
"INSERT OR REPLACE INTO t_bpm_cache (track_uri, bpm, source) VALUES (?, ?, ?)",
261-
(track_uri, bpm_result.bpm, bpm_result.source))
262-
settings.bpm_cache.commit()
263-
bpm_value = bpm_result.bpm if bpm_result else None
264-
normalized_bpm, norm_status = normalize_bpm_for_settings(bpm_value, settings)
265-
266-
if normalized_bpm is not None:
267-
print(f' ✓ MATCH ♯ {track_name}{normalized_bpm} BPM ({norm_status}) [{bpm_result.source}]')
268-
# Insert into DB if unique URI and unique track name
188+
try:
189+
for artist in artists_to_process:
190+
print('')
191+
print('♫', artist.name.upper())
192+
print('=' * 80)
193+
if settings.interactive_mode:
194+
if process not in ("A", "a"):
195+
process = "?"
196+
while process not in ("", "Y", "N", "C", "A", "y", "n", "c", "a"):
197+
process = input("Process this artist? (Enter = Yes; N = No, skip; C = Cancel, "
198+
"stop processing any more artists, A = Yes to all): ") or "Y"
199+
else:
200+
# all artists are processed in non-interactive mode
201+
process = "A"
202+
if process in ("C", "c"):
203+
break
204+
if process in ("A", "a", "Y", "y"):
205+
# Resumability: check if this artist was fully processed in a previous run.
206+
# If so, reload their matched tracks from disk and re-normalize against current
207+
# settings (floor/ceiling/doubling rules may have changed between runs).
208+
already_done = settings.bpm_cache_cursor.execute(
209+
"SELECT 1 FROM t_artists_processed WHERE artist_uri = ?",
210+
(artist.uri,)).fetchone()
211+
if already_done:
212+
prev_tracks = settings.bpm_cache_cursor.execute(
213+
"SELECT track_uri, track_name, raw_bpm, bpm_source "
214+
"FROM t_tracks_matched WHERE artist_uri = ?",
215+
(artist.uri,)).fetchall()
216+
reloaded = 0
217+
for t_uri, t_name, raw_bpm, bpm_source in prev_tracks:
218+
norm_bpm, _ = normalize_bpm_for_settings(raw_bpm, settings)
219+
if norm_bpm is not None:
269220
cur.execute(
270221
"INSERT OR IGNORE INTO t_tracks (track_uri, track_name, track_bpm)"
271222
" SELECT ?, ?, ?"
272223
" WHERE NOT EXISTS (SELECT * FROM t_tracks WHERE track_name = ?);",
273-
(track_uri, track_name, normalized_bpm, track_name)
274-
)
224+
(t_uri, t_name, norm_bpm, t_name))
275225
match_found = True
276-
elif bpm_value is not None:
277-
if settings.debug:
278-
print(f' ✗ no match ♯ {track_name}{bpm_value:.0f} BPM (out of range) [{bpm_result.source}]')
279-
elif bpm_result and bpm_result.notes == "no preview URL (Spotify or Deezer)":
280-
print(f' – skipped ♯ {track_name} → no preview URL on Spotify or Deezer')
281-
else:
282-
print(f' – skipped ♯ {track_name}{bpm_result.notes if bpm_result else "unknown error"}')
283-
track_number += 1
284-
except Exception as ex:
285-
print(f"⚠ BPM resolution failed for track #{track_number}: {ex}")
226+
reloaded += 1
227+
print(f' ↩ Previously processed — {reloaded} track(s) reloaded from cache.')
228+
continue
229+
230+
results = settings.spotify.artist_albums(artist_id=artist.uri, include_groups='album,single')
231+
albums = results['items']
232+
while results['next']:
233+
results = settings.spotify.next(results)
234+
albums.extend(results['items'])
235+
236+
# Build BPM providers once per artist, not per album
237+
bpm_providers = build_bpm_providers(settings)
238+
239+
for album in albums:
240+
print('◌', album['name'])
241+
while True:
242+
try:
243+
album = settings.spotify.album(album['uri'])
244+
tm.sleep(0.1) # gentle pacing — avoids exhausting Spotify's rate limit
245+
except Exception as ex:
246+
ex_str = str(ex)
247+
retry_match = re.search(r'Retry will occur after:\s*(\d+)', ex_str)
248+
if retry_match:
249+
wait_secs = int(retry_match.group(1))
250+
if wait_secs > 300:
251+
print(f"\n⛔ Spotify rate limit — retry allowed in "
252+
f"{wait_secs // 3600}h {(wait_secs % 3600) // 60}m ({wait_secs}s).")
253+
print(" Saving matched tracks found so far, then exiting.")
254+
stop_early = True
255+
break # exits the 'while True' retry loop
256+
print(f"… Spotify rate limited, waiting {wait_secs}s before retry...")
257+
tm.sleep(wait_secs + 1)
258+
else:
259+
print(f"⚠ {type(ex).__name__}: {ex} — pausing 5s then retrying")
260+
tm.sleep(5)
261+
continue
262+
break
263+
if stop_early:
264+
break # exits the 'for album in albums' loop
265+
266+
track_number = 0
267+
for track_obj in (album['tracks']['items'] or []):
268+
try:
269+
track_name = track_obj.get('name')
270+
track_uri = track_obj.get('uri')
271+
preview_url = track_obj.get('preview_url')
272+
artist_name = ''
273+
artists_list = track_obj.get('artists') or []
274+
if artists_list:
275+
artist_name = artists_list[0].get('name') or ''
276+
277+
# Fetch BPM — check persistent cache first to avoid re-downloading
278+
cached_row = settings.bpm_cache_cursor.execute(
279+
"SELECT bpm, source FROM t_bpm_cache WHERE track_uri = ?",
280+
(track_uri,)).fetchone()
281+
if cached_row:
282+
bpm_result = BpmResult(bpm=cached_row[0], source=f"{cached_row[1]}/cached")
283+
else:
284+
bpm_result = bpm_providers[0].get_bpm(
285+
artist_name=artist_name, track_name=track_name, preview_url=preview_url)
286+
if bpm_result.bpm is not None:
287+
settings.bpm_cache_cursor.execute(
288+
"INSERT OR REPLACE INTO t_bpm_cache (track_uri, bpm, source) VALUES (?, ?, ?)",
289+
(track_uri, bpm_result.bpm, bpm_result.source))
290+
bpm_value = bpm_result.bpm if bpm_result else None
291+
normalized_bpm, norm_status = normalize_bpm_for_settings(bpm_value, settings)
292+
293+
# Persist raw BPM to t_tracks_matched for resumability.
294+
# Stored un-normalized so re-runs with different floor/ceiling can re-evaluate.
295+
if bpm_value is not None:
296+
settings.bpm_cache_cursor.execute(
297+
"INSERT OR REPLACE INTO t_tracks_matched "
298+
"(track_uri, track_name, artist_uri, raw_bpm, bpm_source) "
299+
"VALUES (?, ?, ?, ?, ?)",
300+
(track_uri, track_name, artist.uri, bpm_value, bpm_result.source))
301+
302+
if normalized_bpm is not None:
303+
print(f' ✓ MATCH ♯ {track_name}{normalized_bpm} BPM ({norm_status}) [{bpm_result.source}]')
304+
# Insert into DB if unique URI and unique track name
305+
cur.execute(
306+
"INSERT OR IGNORE INTO t_tracks (track_uri, track_name, track_bpm)"
307+
" SELECT ?, ?, ?"
308+
" WHERE NOT EXISTS (SELECT * FROM t_tracks WHERE track_name = ?);",
309+
(track_uri, track_name, normalized_bpm, track_name)
310+
)
311+
match_found = True
312+
elif bpm_value is not None:
313+
if settings.debug:
314+
print(f' ✗ no match ♯ {track_name}{bpm_value:.0f} BPM (out of range) [{bpm_result.source}]')
315+
elif bpm_result and bpm_result.notes == "no preview URL (Spotify or Deezer)":
316+
print(f' – skipped ♯ {track_name} → no preview URL on Spotify or Deezer')
317+
else:
318+
print(f' – skipped ♯ {track_name}{bpm_result.notes if bpm_result else "unknown error"}')
319+
track_number += 1
320+
except Exception as ex:
321+
print(f"⚠ BPM resolution failed for track #{track_number}: {ex}")
322+
323+
# Mark this artist as fully processed only if we completed all their albums.
324+
# Interrupted artists are NOT marked — they'll be re-processed on the next run,
325+
# with partial t_tracks_matched rows safely overwritten by INSERT OR REPLACE.
326+
if not stop_early:
327+
settings.bpm_cache_cursor.execute(
328+
"INSERT OR REPLACE INTO t_artists_processed (artist_uri, artist_name) "
329+
"VALUES (?, ?)",
330+
(artist.uri, artist.name))
331+
settings.bpm_cache.commit()
332+
333+
if stop_early:
334+
break # exits the 'for artist in artists_to_process' loop
335+
336+
except KeyboardInterrupt:
337+
print("\n\n⚡ Interrupted by user — saving matched tracks found so far...")
338+
stop_early = True
286339

287340
db_result = cur.execute("SELECT COUNT(*) AS count_of_tracks "
288341
"FROM t_tracks t "
@@ -318,9 +371,9 @@ def main():
318371
my_api_limit = 95 # The add tracks to playlist API allows maximum 100 tracks at a time
319372
my_tracks = []
320373
query = """
321-
SELECT t.track_uri FROM t_tracks t
322-
WHERE NOT EXISTS (SELECT 1 FROM t_tracks_in_playlists p
323-
WHERE t.ROWID = p.track_id LIMIT 1)
374+
SELECT t.track_uri FROM t_tracks t
375+
WHERE NOT EXISTS (SELECT 1 FROM t_tracks_in_playlists p
376+
WHERE t.ROWID = p.track_id LIMIT 1)
324377
ORDER BY track_bpm ASC, track_order ASC;
325378
"""
326379
# we're fetching the full list because database manipulations inside the loop

settings.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,27 @@ def __init__(self):
5151
cached_at TEXT NOT NULL DEFAULT (datetime('now'))
5252
)
5353
""")
54+
# t_artists_processed: records which artists have been fully scanned so that
55+
# interrupted runs can skip them on the next invocation.
56+
self.bpm_cache_cursor.execute("""
57+
CREATE TABLE IF NOT EXISTS t_artists_processed (
58+
artist_uri TEXT PRIMARY KEY,
59+
artist_name TEXT NOT NULL,
60+
processed_at TEXT NOT NULL DEFAULT (datetime('now'))
61+
)
62+
""")
63+
# t_tracks_matched: raw (un-normalized) BPM for every track that had a result.
64+
# Raw BPM is stored so that re-runs with different floor/ceiling settings can
65+
# re-normalize without re-downloading the audio.
66+
self.bpm_cache_cursor.execute("""
67+
CREATE TABLE IF NOT EXISTS t_tracks_matched (
68+
track_uri TEXT PRIMARY KEY,
69+
track_name TEXT NOT NULL,
70+
artist_uri TEXT NOT NULL,
71+
raw_bpm REAL,
72+
bpm_source TEXT NOT NULL DEFAULT ''
73+
)
74+
""")
5475
self.bpm_cache.commit()
5576

5677
# prepare spotipy spotify client

0 commit comments

Comments
 (0)