Helper class to handle metadata I/O.
tfx.orchestration.metadata.Metadata(
connection_config: tfx.orchestration.metadata.ConnectionConfigType
) -> None
Used in the notebooks
Attributes |
store
|
Returns underlying MetadataStore.
|
Methods
get_artifacts_by_type
View source
get_artifacts_by_type(
type_name: Text
) -> List[metadata_store_pb2.Artifact]
Fetches artifacts given artifact type name.
get_artifacts_by_uri
View source
get_artifacts_by_uri(
uri: Text
) -> List[metadata_store_pb2.Artifact]
Fetches artifacts given uri.
get_cached_outputs
View source
get_cached_outputs(
input_artifacts: Dict[Text, List[Artifact]],
exec_properties: Dict[Text, Any],
pipeline_info: tfx.orchestration.data_types.PipelineInfo
,
component_info: tfx.orchestration.data_types.ComponentInfo
) -> Optional[Dict[Text, List[Artifact]]]
Fetches cached output artifacts if any.
Returns the output artifacts of a cached execution if any. An eligible
cached execution should take the same input artifacts, execution properties
and is associated with the same pipeline context.
Args |
input_artifacts
|
inputs used by the run.
|
exec_properties
|
execution properties used by the run.
|
pipeline_info
|
info of the current pipeline run.
|
component_info
|
info of the current component.
|
Returns |
Dict of cached output artifacts if eligible cached execution is found.
Otherwise, return None.
|
get_component_run_context
View source
get_component_run_context(
component_info: tfx.orchestration.data_types.ComponentInfo
) -> Optional[metadata_store_pb2.Context]
Gets the context for the component run.
Args |
component_info
|
component information for the current component run.
|
Returns |
a matched context or None
|
get_pipeline_context
View source
get_pipeline_context(
pipeline_info: tfx.orchestration.data_types.PipelineInfo
) -> Optional[metadata_store_pb2.Context]
Gets the context for the pipeline run.
Args |
pipeline_info
|
pipeline information for the current pipeline run.
|
Returns |
a matched context or None
|
get_pipeline_run_context
View source
get_pipeline_run_context(
pipeline_info: tfx.orchestration.data_types.PipelineInfo
) -> Optional[metadata_store_pb2.Context]
Gets the context for the pipeline run.
Args |
pipeline_info
|
pipeline information for the current pipeline run.
|
Returns |
a matched context or None
|
get_published_artifacts_by_type_within_context
View source
get_published_artifacts_by_type_within_context(
type_names: List[Text],
context_id: int
) -> Dict[Text, List[metadata_store_pb2.Artifact]]
Fetches artifacts given artifact type name and context id.
get_qualified_artifacts
View source
get_qualified_artifacts(
contexts: List[metadata_store_pb2.Context],
type_name: Text,
producer_component_id: Optional[Text] = None,
output_key: Optional[Text] = None
) -> List[metadata_store_service_pb2.ArtifactAndType]
Gets qualified artifacts that have the right producer info.
Args |
contexts
|
context constraints to filter artifacts
|
type_name
|
type constraint to filter artifacts
|
producer_component_id
|
producer constraint to filter artifacts
|
output_key
|
output key constraint to filter artifacts
|
Returns |
A list of ArtifactAndType, containing qualified artifacts.
|
publish_artifacts
View source
publish_artifacts(
tfx_artifact_list: List[tfx.types.Artifact
]
) -> None
Publishes artifacts to MLMD.
This call will also update original tfx artifact list to contain the
artifact type info and artifact id.
Args |
tfx_artifact_list
|
A list of tfx.types.Artifact which will be updated
|
publish_execution
View source
publish_execution(
component_info: tfx.orchestration.data_types.ComponentInfo
,
output_artifacts: Optional[Dict[Text, List[Artifact]]] = None,
exec_properties: Optional[Dict[Text, Any]] = None
) -> None
Publishes an execution with input and output artifacts info.
This method will publish any execution with non-final states. It will
register unseen artifacts and publish events for them.
Args |
component_info
|
component information.
|
output_artifacts
|
output artifacts produced by the execution.
|
exec_properties
|
execution properties for the execution to be published.
|
register_execution
View source
register_execution(
pipeline_info: tfx.orchestration.data_types.PipelineInfo
,
component_info: tfx.orchestration.data_types.ComponentInfo
,
contexts: List[metadata_store_pb2.Context],
exec_properties: Optional[Dict[Text, Any]] = None,
input_artifacts: Optional[Dict[Text, List[Artifact]]] = None
) -> metadata_store_pb2.Execution
Registers a new execution in metadata.
Args |
pipeline_info
|
optional pipeline info of the execution.
|
component_info
|
optional component info of the execution.
|
contexts
|
contexts for current run, all contexts will be linked to the
execution. In addition, a component run context will be added to the
contexts list.
|
exec_properties
|
the execution properties of the execution.
|
input_artifacts
|
input artifacts of the execution.
|
Returns |
execution id of the new execution.
|
register_pipeline_contexts_if_not_exists
View source
register_pipeline_contexts_if_not_exists(
pipeline_info: tfx.orchestration.data_types.PipelineInfo
) -> List[metadata_store_pb2.Context]
Creates or fetches the pipeline contexts needed for the run.
There are two potential contexts:
- Context for the pipeline.
- Context for the current pipeline run. This is optional, only available
when run_id is specified.
Args |
pipeline_info
|
pipeline information for current run.
|
Returns |
a list (of size one or two) of context.
|
search_artifacts
View source
search_artifacts(
artifact_name: Text,
pipeline_info: tfx.orchestration.data_types.PipelineInfo
,
producer_component_id: Text
) -> List[tfx.types.Artifact
]
Search artifacts that matches given info.
Args |
artifact_name
|
the name of the artifact that set by producer component.
The name is logged both in artifacts and the events when the execution
being published.
|
pipeline_info
|
the information of the current pipeline
|
producer_component_id
|
the id of the component that produces the artifact
|
Returns |
A list of Artifacts that matches the given info
|
Raises |
RuntimeError
|
when no matching execution is found given producer info.
|
update_artifact_state
View source
update_artifact_state(
artifact: metadata_store_pb2.Artifact,
new_state: Text
) -> None
Update the state of a given artifact.
update_execution
View source
update_execution(
execution: metadata_store_pb2.Execution,
component_info: tfx.orchestration.data_types.ComponentInfo
,
input_artifacts: Optional[Dict[Text, List[Artifact]]] = None,
output_artifacts: Optional[Dict[Text, List[Artifact]]] = None,
exec_properties: Optional[Dict[Text, Any]] = None,
execution_state: Optional[Text] = None,
artifact_state: Optional[Text] = None,
contexts: Optional[List[metadata_store_pb2.Context]] = None
) -> None
Updates the given execution in MLMD based on given information.
All artifacts provided will be registered if not already. Registered id will
be reflected inline.
Args |
execution
|
the execution to be updated. It is required that the execution
passed in has an id.
|
component_info
|
the information of the current running component
|
input_artifacts
|
artifacts to be declared as inputs of the execution
|
output_artifacts
|
artifacts to be declared as outputs of the execution
|
exec_properties
|
execution properties of the execution
|
execution_state
|
state the execution to be updated to
|
artifact_state
|
state the artifacts to be updated to
|
contexts
|
a list of contexts the execution and artifacts to be linked to
|
Raises |
RuntimeError
|
if the execution to be updated has no id.
|
__enter__
View source
__enter__() -> "Metadata"
__exit__
View source
__exit__(
exc_type: Optional[Type[Exception]],
exc_value: Optional[Exception],
exc_tb: Optional[types.TracebackType]
) -> None