@@ -323,15 +323,18 @@ def __init__(self,
323323 event_logger = None ,
324324 monitor = None ,
325325 data_obj = None ,
326- artifact_cache = None ):
326+ artifact_cache = None ,
327+ allow_unsuccessful = False ):
327328 if run_id == 'data' :
328329 raise DataException ("Run ID 'data' is reserved. "
329330 "Try with a different --run-id." )
330331 if self .datastore_root is None :
331332 raise DataException ("Datastore root not found. "
332333 "Specify with METAFLOW_DATASTORE_SYSROOT_%s "
333334 "environment variable." % self .TYPE .upper ())
334-
335+ # NOTE: calling __init__(mode='w') should be a cheap operation:
336+ # no file system accesses are allowed. It is called frequently
337+ # e.g. to resolve log file location.
335338 self .event_logger = event_logger if event_logger else NullEventLogger ()
336339 self .monitor = monitor if monitor else NullMonitor ()
337340 self .metadata = metadata
@@ -356,14 +359,7 @@ def __init__(self,
356359 task_id )
357360
358361 self .attempt = attempt
359- if mode == 'w' :
360- if run_id is not None :
361- # run_id may be None when datastore is used to save
362- # things not related to runs, e.g. the job package
363- self .save_metadata ('attempt' , {'time' : time .time ()})
364- self .objects = {}
365- self .info = {}
366- elif mode == 'r' :
362+ if mode == 'r' :
367363 if data_obj is None :
368364 # what is the latest attempt ID of this data store?
369365
@@ -389,25 +385,37 @@ def __init__(self,
389385 self .attempt = i
390386
391387 # was the latest attempt completed successfully?
392- if not self .is_done ():
388+ if self .is_done ():
389+ # load the data from the latest attempt
390+ data_obj = self .load_metadata ('data' )
391+ elif allow_unsuccessful and self .attempt is not None :
392+ # this mode can be used to load_logs, for instance
393+ data_obj = None
394+ else :
393395 raise DataException ("Data was not found or not finished at %s" \
394396 % self .root )
395397
396- # load the data from the latest attempt
397- data_obj = self .load_metadata ('data' )
398-
399- self .origin = data_obj .get ('origin' )
400- self .objects = data_obj ['objects' ]
401- self .info = data_obj .get ('info' , {})
398+ if data_obj :
399+ self .origin = data_obj .get ('origin' )
400+ self .objects = data_obj ['objects' ]
401+ self .info = data_obj .get ('info' , {})
402402 elif mode == 'd' :
403403 # Direct access mode used by the client. We effectively don't load any
404404 # objects and can only access things using the load_* functions
405405 self .origin = None
406406 self .objects = None
407407 self .info = None
408- else :
408+ elif mode != 'w' :
409409 raise DataException ('Unknown datastore mode: %s' % mode )
410410
411+ def init_task (self ):
412+ # this method should be called once after datastore has been opened
413+ # for task-related write operations
414+ self .save_metadata ('attempt' , {'time' : time .time ()})
415+ self .objects = {}
416+ self .info = {}
417+
418+
411419 @property
412420 def pathspec (self ):
413421 return '%s/%s/%s' % (self .run_id , self .step_name , self .task_id )
0 commit comments