การสอน TFX Pipeline อย่างง่ายโดยใช้ชุดข้อมูล Penguin

บทช่วยสอนสั้น ๆ เพื่อเรียกใช้ไปป์ไลน์ TFX อย่างง่าย

ในบทช่วยสอนที่ใช้สมุดบันทึกนี้ เราจะสร้างและเรียกใช้ไปป์ไลน์ TFX สำหรับแบบจำลองการจัดประเภทอย่างง่าย ไปป์ไลน์จะประกอบด้วยองค์ประกอบ TFX ที่จำเป็นสามส่วน: ExampleGen, Trainer และ Pusher ไปป์ไลน์ประกอบด้วยเวิร์กโฟลว์ ML ที่น้อยที่สุด เช่น การนำเข้าข้อมูล การฝึกโมเดล และการส่งออกโมเดลที่ได้รับการฝึก

โปรดดู เข้าใจ TFX วางท่อ เพื่อเรียนรู้เพิ่มเติมเกี่ยวกับแนวคิดต่าง ๆ ใน TFX


ก่อนอื่นเราต้องติดตั้งแพ็คเกจ TFX Python และดาวน์โหลดชุดข้อมูลที่เราจะใช้สำหรับโมเดลของเรา

อัพเกรด Pip

เพื่อหลีกเลี่ยงการอัพเกรด Pip ในระบบเมื่อรันในเครื่อง ให้ตรวจสอบว่าเรากำลังทำงานใน Colab แน่นอนว่าระบบในพื้นที่สามารถอัพเกรดแยกกันได้

  import colab
  !pip install --upgrade pip

ติดตั้ง TFX

pip install -U tfx


หากคุณกำลังใช้ Google Colab ในครั้งแรกที่คุณเรียกใช้เซลล์ด้านบน คุณต้องรีสตาร์ทรันไทม์โดยคลิกที่ปุ่ม "RESTART RUNTIME" ด้านบน หรือใช้เมนู "Runtime > Restart runtime ..." นี่เป็นเพราะวิธีที่ Colab โหลดแพ็คเกจ

ตรวจสอบเวอร์ชัน TensorFlow และ TFX

import tensorflow as tf
print('TensorFlow version: {}'.format(tf.__version__))
from tfx import v1 as tfx
print('TFX version: {}'.format(tfx.__version__))
TensorFlow version: 2.6.2
TFX version: 1.4.0


มีตัวแปรบางตัวที่ใช้กำหนดไปป์ไลน์ คุณสามารถปรับแต่งตัวแปรเหล่านี้ได้ตามต้องการ โดยค่าเริ่มต้น เอาต์พุตทั้งหมดจากไปป์ไลน์จะถูกสร้างขึ้นภายใต้ไดเร็กทอรีปัจจุบัน

import os

PIPELINE_NAME = "penguin-simple"

# Output directory to store artifacts generated from the pipeline.
PIPELINE_ROOT = os.path.join('pipelines', PIPELINE_NAME)
# Path to a SQLite DB file to use as an MLMD storage.
METADATA_PATH = os.path.join('metadata', PIPELINE_NAME, 'metadata.db')
# Output directory where created models from the pipeline will be exported.
SERVING_MODEL_DIR = os.path.join('serving_model', PIPELINE_NAME)

from absl import logging
logging.set_verbosity(logging.INFO)  # Set default logging level.


เราจะดาวน์โหลดชุดข้อมูลตัวอย่างเพื่อใช้ในไปป์ไลน์ TFX ของเรา ชุดข้อมูลที่เราใช้เป็น ชุดข้อมูลที่พาลเมอร์เพนกวิน ซึ่งยังใช้ในการอื่น ๆ ตัวอย่าง TFX


  • culmen_length_mm
  • culmen_deep_mm
  • flipper_length_mm
  • body_mass_g

คุณลักษณะทั้งหมดได้รับการทำให้เป็นมาตรฐานแล้วให้มีช่วง [0,1] เราจะสร้างรูปแบบการจัดชั้นที่คาดการณ์ species ของนกเพนกวิน

เนื่องจาก TFX ExampleGen อ่านอินพุตจากไดเร็กทอรี เราจึงต้องสร้างไดเร็กทอรีและคัดลอกชุดข้อมูลไปยังไดเร็กทอรี

import urllib.request
import tempfile

DATA_ROOT = tempfile.mkdtemp(prefix='tfx-data')  # Create a temporary directory.
_data_url = 'https://raw.githubusercontent.com/tensorflow/tfx/master/tfx/examples/penguin/data/labelled/penguins_processed.csv'
_data_filepath = os.path.join(DATA_ROOT, "data.csv")
urllib.request.urlretrieve(_data_url, _data_filepath)
('/tmp/tfx-dataijanq9u3/data.csv', <http.client.HTTPMessage at 0x7f487953d110>)

ดูไฟล์ CSV อย่างรวดเร็ว

head {_data_filepath}

คุณควรจะสามารถเห็นค่าห้าค่าได้ species เป็นหนึ่งใน 0, 1 หรือ 2 และคุณสมบัติอื่น ๆ ทุกคนควรจะมีค่าระหว่าง 0 และ 1


ไปป์ไลน์ TFX ถูกกำหนดโดยใช้ Python API เราจะกำหนดไปป์ไลน์ซึ่งประกอบด้วยสามองค์ประกอบดังต่อไปนี้

  • CsvExampleGen: อ่านในไฟล์ข้อมูลและแปลงเป็นรูปแบบภายใน TFX เพื่อการประมวลผลต่อไป มีหลาย ExampleGen สำหรับรูปแบบต่างๆ ในบทช่วยสอนนี้ เราจะใช้ CsvExampleGen ซึ่งรับอินพุตไฟล์ CSV
  • ผู้ฝึกสอน: ฝึกโมเดล ML องค์ประกอบเทรนเนอร์ ต้องใช้รหัสนิยามรูปแบบจากผู้ใช้ คุณสามารถใช้ TensorFlow API เพื่อระบุวิธีการฝึกอบรมรุ่นและบันทึกไว้ในรูปแบบรูปแบบ _saved
  • Pusher: คัดลอกโมเดลที่ผ่านการฝึกอบรมนอกไปป์ไลน์ TFX องค์ประกอบ Pusher อาจจะคิดว่าเป็นขั้นตอนการใช้งานของรุ่น ML ผ่านการฝึกอบรม

ก่อนกำหนดไปป์ไลน์ เราต้องเขียนโค้ดโมเดลสำหรับคอมโพเนนต์ Trainer ก่อน


เราจะสร้างโมเดล DNN อย่างง่ายสำหรับการจัดหมวดหมู่โดยใช้ TensorFlow Keras API รหัสการฝึกโมเดลนี้จะถูกบันทึกลงในไฟล์แยกต่างหาก

ในการกวดวิชานี้เราจะใช้ ทั่วไปเทรนเนอร์ ของ TFX ที่สนับสนุนรุ่น Keras ตาม คุณจำเป็นต้องเขียนไฟล์ที่มีงูหลาม run_fn ฟังก์ชั่นซึ่งเป็นจุดเข้าใช้งานสำหรับ Trainer ส่วนประกอบ

_trainer_module_file = 'penguin_trainer.py'
%%writefile {_trainer_module_file}

from typing import List
from absl import logging
import tensorflow as tf
from tensorflow import keras
from tensorflow_transform.tf_metadata import schema_utils

from tfx import v1 as tfx
from tfx_bsl.public import tfxio
from tensorflow_metadata.proto.v0 import schema_pb2

    'culmen_length_mm', 'culmen_depth_mm', 'flipper_length_mm', 'body_mass_g'
_LABEL_KEY = 'species'


# Since we're not generating or creating a schema, we will instead create
# a feature spec.  Since there are a fairly small number of features this is
# manageable for this dataset.
        feature: tf.io.FixedLenFeature(shape=[1], dtype=tf.float32)
           for feature in _FEATURE_KEYS
    _LABEL_KEY: tf.io.FixedLenFeature(shape=[1], dtype=tf.int64)

def _input_fn(file_pattern: List[str],
              data_accessor: tfx.components.DataAccessor,
              schema: schema_pb2.Schema,
              batch_size: int = 200) -> tf.data.Dataset:
  """Generates features and label for training.

    file_pattern: List of paths or patterns of input tfrecord files.
    data_accessor: DataAccessor for converting input to RecordBatch.
    schema: schema of the input data.
    batch_size: representing the number of consecutive elements of returned
      dataset to combine in a single batch

    A dataset that contains (features, indices) tuple where features is a
      dictionary of Tensors, and indices is a single Tensor of label indices.
  return data_accessor.tf_dataset_factory(
          batch_size=batch_size, label_key=_LABEL_KEY),

def _build_keras_model() -> tf.keras.Model:
  """Creates a DNN Keras model for classifying penguin data.

    A Keras Model.
  # The model below is built with Functional API, please refer to
  # https://www.tensorflow.org/guide/keras/overview for all API options.
  inputs = [keras.layers.Input(shape=(1,), name=f) for f in _FEATURE_KEYS]
  d = keras.layers.concatenate(inputs)
  for _ in range(2):
    d = keras.layers.Dense(8, activation='relu')(d)
  outputs = keras.layers.Dense(3)(d)

  model = keras.Model(inputs=inputs, outputs=outputs)

  return model

# TFX Trainer will call this function.
def run_fn(fn_args: tfx.components.FnArgs):
  """Train the model based on given args.

    fn_args: Holds args used to train the model as name/value pairs.

  # This schema is usually either an output of SchemaGen or a manually-curated
  # version provided by pipeline author. A schema can also derived from TFT
  # graph if a Transform component is used. In the case when either is missing,
  # `schema_from_feature_spec` could be used to generate schema from very simple
  # feature_spec, but the schema returned would be very primitive.
  schema = schema_utils.schema_from_feature_spec(_FEATURE_SPEC)

  train_dataset = _input_fn(
  eval_dataset = _input_fn(

  model = _build_keras_model()

  # The result of the training should be saved in `fn_args.serving_model_dir`
  # directory.
  model.save(fn_args.serving_model_dir, save_format='tf')
Writing penguin_trainer.py

ตอนนี้คุณได้เสร็จสิ้นขั้นตอนการเตรียมการทั้งหมดเพื่อสร้างไปป์ไลน์ TFX แล้ว


เรากำหนดฟังก์ชันเพื่อสร้างไปป์ไลน์ TFX Pipeline วัตถุหมายถึงท่อ TFX ซึ่งสามารถทำงานโดยใช้หนึ่งในระบบท่อประสานที่ TFX สนับสนุน

def _create_pipeline(pipeline_name: str, pipeline_root: str, data_root: str,
                     module_file: str, serving_model_dir: str,
                     metadata_path: str) -> tfx.dsl.Pipeline:
  """Creates a three component penguin pipeline with TFX."""
  # Brings data into the pipeline.
  example_gen = tfx.components.CsvExampleGen(input_base=data_root)

  # Uses user-provided Python function that trains a model.
  trainer = tfx.components.Trainer(

  # Pushes the model to a filesystem destination.
  pusher = tfx.components.Pusher(

  # Following three components will be included in the pipeline.
  components = [

  return tfx.dsl.Pipeline(


TFX รองรับ orchestrator หลายตัวในการรันไปป์ไลน์ ในการกวดวิชานี้เราจะใช้ LocalDagRunner ซึ่งรวมอยู่ในแพคเกจ TFX งูหลามและวิ่งท่อบนสภาพแวดล้อมในท้องถิ่น เรามักเรียก TFX ไปป์ไลน์ว่า "DAG" ซึ่งย่อมาจากกราฟ acyclic แบบกำกับทิศทาง

LocalDagRunner ให้ซ้ำได้อย่างรวดเร็วสำหรับ developemnt และแก้จุดบกพร่อง TFX ยังสนับสนุน orchestrator อื่นๆ รวมถึง Kubeflow Pipelines และ Apache Airflow ซึ่งเหมาะสำหรับกรณีการใช้งานจริง

ดู TFX เกี่ยวกับ Cloud AI แพลตฟอร์มท่อ หรือ TFX Airflow กวดวิชา ที่จะเรียนรู้เพิ่มเติมเกี่ยวกับระบบการประสานอื่น ๆ

ตอนนี้เราสร้าง LocalDagRunner และผ่าน Pipeline วัตถุที่สร้างขึ้นจากการทำงานที่เรากำหนดไว้แล้ว

ไปป์ไลน์ทำงานโดยตรงและคุณสามารถดูบันทึกสำหรับความคืบหน้าของไปป์ไลน์รวมถึงการฝึกโมเดล ML

INFO:absl:Generating ephemeral wheel package for '/tmpfs/src/temp/docs/tutorials/tfx/penguin_trainer.py' (including modules: ['penguin_trainer']).
INFO:absl:User module package has hash fingerprint version a7e2e8dccbb913b74904edeec5549d868a2ea392bcd84fbc1965aba698dce3fc.
INFO:absl:Executing: ['/tmpfs/src/tf_docs_env/bin/python', '/tmp/tmp28n_co8j/_tfx_generated_setup.py', 'bdist_wheel', '--bdist-dir', '/tmp/tmpfb02sbta', '--dist-dir', '/tmp/tmpyu7gi15_']
/tmpfs/src/tf_docs_env/lib/python3.7/site-packages/setuptools/command/install.py:37: SetuptoolsDeprecationWarning: setup.py install is deprecated. Use build and pip and other standards-based tools.
listing git files failed - pretending there aren't any
INFO:absl:Successfully built user code wheel distribution at 'pipelines/penguin-simple/_wheels/tfx_user_code_Trainer-0.0+a7e2e8dccbb913b74904edeec5549d868a2ea392bcd84fbc1965aba698dce3fc-py3-none-any.whl'; target user module is 'penguin_trainer'.
INFO:absl:Full user module path is 'penguin_trainer@pipelines/penguin-simple/_wheels/tfx_user_code_Trainer-0.0+a7e2e8dccbb913b74904edeec5549d868a2ea392bcd84fbc1965aba698dce3fc-py3-none-any.whl'
INFO:absl:Using deployment config:
 executor_specs {
  key: "CsvExampleGen"
  value {
    beam_executable_spec {
      python_executor_spec {
        class_path: "tfx.components.example_gen.csv_example_gen.executor.Executor"
executor_specs {
  key: "Pusher"
  value {
    python_class_executable_spec {
      class_path: "tfx.components.pusher.executor.Executor"
executor_specs {
  key: "Trainer"
  value {
    python_class_executable_spec {
      class_path: "tfx.components.trainer.executor.GenericExecutor"
custom_driver_specs {
  key: "CsvExampleGen"
  value {
    python_class_executable_spec {
      class_path: "tfx.components.example_gen.driver.FileBasedDriver"
metadata_connection_config {
  sqlite {
    filename_uri: "metadata/penguin-simple/metadata.db"
    connection_mode: READWRITE_OPENCREATE

INFO:absl:Using connection config:
 sqlite {
  filename_uri: "metadata/penguin-simple/metadata.db"
  connection_mode: READWRITE_OPENCREATE

INFO:absl:Component CsvExampleGen is running.
INFO:absl:Running launcher for node_info {
  type {
    name: "tfx.components.example_gen.csv_example_gen.component.CsvExampleGen"
  id: "CsvExampleGen"
contexts {
  contexts {
    type {
      name: "pipeline"
    name {
      field_value {
        string_value: "penguin-simple"
  contexts {
    type {
      name: "pipeline_run"
    name {
      field_value {
        string_value: "2021-12-05T10:44:06.706974"
  contexts {
    type {
      name: "node"
    name {
      field_value {
        string_value: "penguin-simple.CsvExampleGen"
outputs {
  outputs {
    key: "examples"
    value {
      artifact_spec {
        type {
          name: "Examples"
          properties {
            key: "span"
            value: INT
          properties {
            key: "split_names"
            value: STRING
          properties {
            key: "version"
            value: INT
parameters {
  parameters {
    key: "input_base"
    value {
      field_value {
        string_value: "/tmp/tfx-dataijanq9u3"
  parameters {
    key: "input_config"
    value {
      field_value {
        string_value: "{\n  \"splits\": [\n    {\n      \"name\": \"single_split\",\n      \"pattern\": \"*\"\n    }\n  ]\n}"
  parameters {
    key: "output_config"
    value {
      field_value {
        string_value: "{\n  \"split_config\": {\n    \"splits\": [\n      {\n        \"hash_buckets\": 2,\n        \"name\": \"train\"\n      },\n      {\n        \"hash_buckets\": 1,\n        \"name\": \"eval\"\n      }\n    ]\n  }\n}"
  parameters {
    key: "output_data_format"
    value {
      field_value {
        int_value: 6
  parameters {
    key: "output_file_format"
    value {
      field_value {
        int_value: 5
downstream_nodes: "Trainer"
execution_options {
  caching_options {

INFO:absl:MetadataStore with DB connection initialized
running bdist_wheel
running build
running build_py
creating build
creating build/lib
copying penguin_trainer.py -> build/lib
installing to /tmp/tmpfb02sbta
running install
running install_lib
copying build/lib/penguin_trainer.py -> /tmp/tmpfb02sbta
running install_egg_info
running egg_info
creating tfx_user_code_Trainer.egg-info
writing tfx_user_code_Trainer.egg-info/PKG-INFO
writing dependency_links to tfx_user_code_Trainer.egg-info/dependency_links.txt
writing top-level names to tfx_user_code_Trainer.egg-info/top_level.txt
writing manifest file 'tfx_user_code_Trainer.egg-info/SOURCES.txt'
reading manifest file 'tfx_user_code_Trainer.egg-info/SOURCES.txt'
writing manifest file 'tfx_user_code_Trainer.egg-info/SOURCES.txt'
Copying tfx_user_code_Trainer.egg-info to /tmp/tmpfb02sbta/tfx_user_code_Trainer-0.0+a7e2e8dccbb913b74904edeec5549d868a2ea392bcd84fbc1965aba698dce3fc-py3.7.egg-info
running install_scripts
creating /tmp/tmpfb02sbta/tfx_user_code_Trainer-0.0+a7e2e8dccbb913b74904edeec5549d868a2ea392bcd84fbc1965aba698dce3fc.dist-info/WHEEL
creating '/tmp/tmpyu7gi15_/tfx_user_code_Trainer-0.0+a7e2e8dccbb913b74904edeec5549d868a2ea392bcd84fbc1965aba698dce3fc-py3-none-any.whl' and adding '/tmp/tmpfb02sbta' to it
adding 'penguin_trainer.py'
adding 'tfx_user_code_Trainer-0.0+a7e2e8dccbb913b74904edeec5549d868a2ea392bcd84fbc1965aba698dce3fc.dist-info/METADATA'
adding 'tfx_user_code_Trainer-0.0+a7e2e8dccbb913b74904edeec5549d868a2ea392bcd84fbc1965aba698dce3fc.dist-info/WHEEL'
adding 'tfx_user_code_Trainer-0.0+a7e2e8dccbb913b74904edeec5549d868a2ea392bcd84fbc1965aba698dce3fc.dist-info/top_level.txt'
adding 'tfx_user_code_Trainer-0.0+a7e2e8dccbb913b74904edeec5549d868a2ea392bcd84fbc1965aba698dce3fc.dist-info/RECORD'
removing /tmp/tmpfb02sbta
WARNING: Logging before InitGoogleLogging() is written to STDERR
I1205 10:44:07.061197 30480 rdbms_metadata_access_object.cc:686] No property is defined for the Type
I1205 10:44:07.067816 30480 rdbms_metadata_access_object.cc:686] No property is defined for the Type
I1205 10:44:07.074599 30480 rdbms_metadata_access_object.cc:686] No property is defined for the Type
I1205 10:44:07.081624 30480 rdbms_metadata_access_object.cc:686] No property is defined for the Type
INFO:absl:select span and version = (0, None)
INFO:absl:latest span and version = (0, None)
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:Going to run a new execution 1
I1205 10:44:07.136307 30480 rdbms_metadata_access_object.cc:686] No property is defined for the Type
INFO:absl:Going to run a new execution: ExecutionInfo(execution_id=1, input_dict={}, output_dict=defaultdict(<class 'list'>, {'examples': [Artifact(artifact: uri: "pipelines/penguin-simple/CsvExampleGen/examples/1"
custom_properties {
  key: "input_fingerprint"
  value {
    string_value: "split:single_split,num_files:1,total_bytes:25648,xor_checksum:1638701046,sum_checksum:1638701046"
custom_properties {
  key: "name"
  value {
    string_value: "penguin-simple:2021-12-05T10:44:06.706974:CsvExampleGen:examples:0"
custom_properties {
  key: "span"
  value {
    int_value: 0
, artifact_type: name: "Examples"
properties {
  key: "span"
  value: INT
properties {
  key: "split_names"
  value: STRING
properties {
  key: "version"
  value: INT
)]}), exec_properties={'output_config': '{\n  "split_config": {\n    "splits": [\n      {\n        "hash_buckets": 2,\n        "name": "train"\n      },\n      {\n        "hash_buckets": 1,\n        "name": "eval"\n      }\n    ]\n  }\n}', 'input_base': '/tmp/tfx-dataijanq9u3', 'input_config': '{\n  "splits": [\n    {\n      "name": "single_split",\n      "pattern": "*"\n    }\n  ]\n}', 'output_file_format': 5, 'output_data_format': 6, 'span': 0, 'version': None, 'input_fingerprint': 'split:single_split,num_files:1,total_bytes:25648,xor_checksum:1638701046,sum_checksum:1638701046'}, execution_output_uri='pipelines/penguin-simple/CsvExampleGen/.system/executor_execution/1/executor_output.pb', stateful_working_dir='pipelines/penguin-simple/CsvExampleGen/.system/stateful_working_dir/2021-12-05T10:44:06.706974', tmp_dir='pipelines/penguin-simple/CsvExampleGen/.system/executor_execution/1/.temp/', pipeline_node=node_info {
  type {
    name: "tfx.components.example_gen.csv_example_gen.component.CsvExampleGen"
  id: "CsvExampleGen"
contexts {
  contexts {
    type {
      name: "pipeline"
    name {
      field_value {
        string_value: "penguin-simple"
  contexts {
    type {
      name: "pipeline_run"
    name {
      field_value {
        string_value: "2021-12-05T10:44:06.706974"
  contexts {
    type {
      name: "node"
    name {
      field_value {
        string_value: "penguin-simple.CsvExampleGen"
outputs {
  outputs {
    key: "examples"
    value {
      artifact_spec {
        type {
          name: "Examples"
          properties {
            key: "span"
            value: INT
          properties {
            key: "split_names"
            value: STRING
          properties {
            key: "version"
            value: INT
parameters {
  parameters {
    key: "input_base"
    value {
      field_value {
        string_value: "/tmp/tfx-dataijanq9u3"
  parameters {
    key: "input_config"
    value {
      field_value {
        string_value: "{\n  \"splits\": [\n    {\n      \"name\": \"single_split\",\n      \"pattern\": \"*\"\n    }\n  ]\n}"
  parameters {
    key: "output_config"
    value {
      field_value {
        string_value: "{\n  \"split_config\": {\n    \"splits\": [\n      {\n        \"hash_buckets\": 2,\n        \"name\": \"train\"\n      },\n      {\n        \"hash_buckets\": 1,\n        \"name\": \"eval\"\n      }\n    ]\n  }\n}"
  parameters {
    key: "output_data_format"
    value {
      field_value {
        int_value: 6
  parameters {
    key: "output_file_format"
    value {
      field_value {
        int_value: 5
downstream_nodes: "Trainer"
execution_options {
  caching_options {
, pipeline_info=id: "penguin-simple"
, pipeline_run_id='2021-12-05T10:44:06.706974')
INFO:absl:Generating examples.
WARNING:apache_beam.runners.interactive.interactive_environment:Dependencies required for Interactive Beam PCollection visualization are not available, please use: `pip install apache-beam[interactive]` to install necessary dependencies to enable all data visualization features.
INFO:absl:Processing input csv data /tmp/tfx-dataijanq9u3/* to TFExample.
WARNING:root:Make sure that locally built Python SDK docker image has Python 3.7 interpreter.
WARNING:apache_beam.io.tfrecordio:Couldn't find python-snappy so the implementation of _TFRecordUtil._masked_crc32c is not as fast as it could be.
INFO:absl:Examples generated.
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 1 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'examples': [Artifact(artifact: uri: "pipelines/penguin-simple/CsvExampleGen/examples/1"
custom_properties {
  key: "input_fingerprint"
  value {
    string_value: "split:single_split,num_files:1,total_bytes:25648,xor_checksum:1638701046,sum_checksum:1638701046"
custom_properties {
  key: "name"
  value {
    string_value: "penguin-simple:2021-12-05T10:44:06.706974:CsvExampleGen:examples:0"
custom_properties {
  key: "span"
  value {
    int_value: 0
custom_properties {
  key: "tfx_version"
  value {
    string_value: "1.4.0"
, artifact_type: name: "Examples"
properties {
  key: "span"
  value: INT
properties {
  key: "split_names"
  value: STRING
properties {
  key: "version"
  value: INT
)]}) for execution 1
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:Component CsvExampleGen is finished.
INFO:absl:Component Trainer is running.
INFO:absl:Running launcher for node_info {
  type {
    name: "tfx.components.trainer.component.Trainer"
  id: "Trainer"
contexts {
  contexts {
    type {
      name: "pipeline"
    name {
      field_value {
        string_value: "penguin-simple"
  contexts {
    type {
      name: "pipeline_run"
    name {
      field_value {
        string_value: "2021-12-05T10:44:06.706974"
  contexts {
    type {
      name: "node"
    name {
      field_value {
        string_value: "penguin-simple.Trainer"
inputs {
  inputs {
    key: "examples"
    value {
      channels {
        producer_node_query {
          id: "CsvExampleGen"
        context_queries {
          type {
            name: "pipeline"
          name {
            field_value {
              string_value: "penguin-simple"
        context_queries {
          type {
            name: "pipeline_run"
          name {
            field_value {
              string_value: "2021-12-05T10:44:06.706974"
        context_queries {
          type {
            name: "node"
          name {
            field_value {
              string_value: "penguin-simple.CsvExampleGen"
        artifact_query {
          type {
            name: "Examples"
        output_key: "examples"
      min_count: 1
outputs {
  outputs {
    key: "model"
    value {
      artifact_spec {
        type {
          name: "Model"
  outputs {
    key: "model_run"
    value {
      artifact_spec {
        type {
          name: "ModelRun"
parameters {
  parameters {
    key: "custom_config"
    value {
      field_value {
        string_value: "null"
  parameters {
    key: "eval_args"
    value {
      field_value {
        string_value: "{\n  \"num_steps\": 5\n}"
  parameters {
    key: "module_path"
    value {
      field_value {
        string_value: "penguin_trainer@pipelines/penguin-simple/_wheels/tfx_user_code_Trainer-0.0+a7e2e8dccbb913b74904edeec5549d868a2ea392bcd84fbc1965aba698dce3fc-py3-none-any.whl"
  parameters {
    key: "train_args"
    value {
      field_value {
        string_value: "{\n  \"num_steps\": 100\n}"
upstream_nodes: "CsvExampleGen"
downstream_nodes: "Pusher"
execution_options {
  caching_options {

INFO:absl:MetadataStore with DB connection initialized
INFO:absl:MetadataStore with DB connection initialized
I1205 10:44:08.274386 30480 rdbms_metadata_access_object.cc:686] No property is defined for the Type
INFO:absl:Going to run a new execution 2
INFO:absl:Going to run a new execution: ExecutionInfo(execution_id=2, input_dict={'examples': [Artifact(artifact: id: 1
type_id: 15
uri: "pipelines/penguin-simple/CsvExampleGen/examples/1"
properties {
  key: "split_names"
  value {
    string_value: "[\"train\", \"eval\"]"
custom_properties {
  key: "file_format"
  value {
    string_value: "tfrecords_gzip"
custom_properties {
  key: "input_fingerprint"
  value {
    string_value: "split:single_split,num_files:1,total_bytes:25648,xor_checksum:1638701046,sum_checksum:1638701046"
custom_properties {
  key: "name"
  value {
    string_value: "penguin-simple:2021-12-05T10:44:06.706974:CsvExampleGen:examples:0"
custom_properties {
  key: "payload_format"
  value {
    string_value: "FORMAT_TF_EXAMPLE"
custom_properties {
  key: "span"
  value {
    int_value: 0
custom_properties {
  key: "tfx_version"
  value {
    string_value: "1.4.0"
state: LIVE
create_time_since_epoch: 1638701048257
last_update_time_since_epoch: 1638701048257
, artifact_type: id: 15
name: "Examples"
properties {
  key: "span"
  value: INT
properties {
  key: "split_names"
  value: STRING
properties {
  key: "version"
  value: INT
)]}, output_dict=defaultdict(<class 'list'>, {'model': [Artifact(artifact: uri: "pipelines/penguin-simple/Trainer/model/2"
custom_properties {
  key: "name"
  value {
    string_value: "penguin-simple:2021-12-05T10:44:06.706974:Trainer:model:0"
, artifact_type: name: "Model"
)], 'model_run': [Artifact(artifact: uri: "pipelines/penguin-simple/Trainer/model_run/2"
custom_properties {
  key: "name"
  value {
    string_value: "penguin-simple:2021-12-05T10:44:06.706974:Trainer:model_run:0"
, artifact_type: name: "ModelRun"
)]}), exec_properties={'custom_config': 'null', 'module_path': 'penguin_trainer@pipelines/penguin-simple/_wheels/tfx_user_code_Trainer-0.0+a7e2e8dccbb913b74904edeec5549d868a2ea392bcd84fbc1965aba698dce3fc-py3-none-any.whl', 'train_args': '{\n  "num_steps": 100\n}', 'eval_args': '{\n  "num_steps": 5\n}'}, execution_output_uri='pipelines/penguin-simple/Trainer/.system/executor_execution/2/executor_output.pb', stateful_working_dir='pipelines/penguin-simple/Trainer/.system/stateful_working_dir/2021-12-05T10:44:06.706974', tmp_dir='pipelines/penguin-simple/Trainer/.system/executor_execution/2/.temp/', pipeline_node=node_info {
  type {
    name: "tfx.components.trainer.component.Trainer"
  id: "Trainer"
contexts {
  contexts {
    type {
      name: "pipeline"
    name {
      field_value {
        string_value: "penguin-simple"
  contexts {
    type {
      name: "pipeline_run"
    name {
      field_value {
        string_value: "2021-12-05T10:44:06.706974"
  contexts {
    type {
      name: "node"
    name {
      field_value {
        string_value: "penguin-simple.Trainer"
inputs {
  inputs {
    key: "examples"
    value {
      channels {
        producer_node_query {
          id: "CsvExampleGen"
        context_queries {
          type {
            name: "pipeline"
          name {
            field_value {
              string_value: "penguin-simple"
        context_queries {
          type {
            name: "pipeline_run"
          name {
            field_value {
              string_value: "2021-12-05T10:44:06.706974"
        context_queries {
          type {
            name: "node"
          name {
            field_value {
              string_value: "penguin-simple.CsvExampleGen"
        artifact_query {
          type {
            name: "Examples"
        output_key: "examples"
      min_count: 1
outputs {
  outputs {
    key: "model"
    value {
      artifact_spec {
        type {
          name: "Model"
  outputs {
    key: "model_run"
    value {
      artifact_spec {
        type {
          name: "ModelRun"
parameters {
  parameters {
    key: "custom_config"
    value {
      field_value {
        string_value: "null"
  parameters {
    key: "eval_args"
    value {
      field_value {
        string_value: "{\n  \"num_steps\": 5\n}"
  parameters {
    key: "module_path"
    value {
      field_value {
        string_value: "penguin_trainer@pipelines/penguin-simple/_wheels/tfx_user_code_Trainer-0.0+a7e2e8dccbb913b74904edeec5549d868a2ea392bcd84fbc1965aba698dce3fc-py3-none-any.whl"
  parameters {
    key: "train_args"
    value {
      field_value {
        string_value: "{\n  \"num_steps\": 100\n}"
upstream_nodes: "CsvExampleGen"
downstream_nodes: "Pusher"
execution_options {
  caching_options {
, pipeline_info=id: "penguin-simple"
, pipeline_run_id='2021-12-05T10:44:06.706974')
INFO:absl:Train on the 'train' split when train_args.splits is not set.
INFO:absl:Evaluate on the 'eval' split when eval_args.splits is not set.
INFO:absl:udf_utils.get_fn {'custom_config': 'null', 'module_path': 'penguin_trainer@pipelines/penguin-simple/_wheels/tfx_user_code_Trainer-0.0+a7e2e8dccbb913b74904edeec5549d868a2ea392bcd84fbc1965aba698dce3fc-py3-none-any.whl', 'train_args': '{\n  "num_steps": 100\n}', 'eval_args': '{\n  "num_steps": 5\n}'} 'run_fn'
INFO:absl:Installing 'pipelines/penguin-simple/_wheels/tfx_user_code_Trainer-0.0+a7e2e8dccbb913b74904edeec5549d868a2ea392bcd84fbc1965aba698dce3fc-py3-none-any.whl' to a temporary directory.
INFO:absl:Executing: ['/tmpfs/src/tf_docs_env/bin/python', '-m', 'pip', 'install', '--target', '/tmp/tmp9yk6w_js', 'pipelines/penguin-simple/_wheels/tfx_user_code_Trainer-0.0+a7e2e8dccbb913b74904edeec5549d868a2ea392bcd84fbc1965aba698dce3fc-py3-none-any.whl']
Processing ./pipelines/penguin-simple/_wheels/tfx_user_code_Trainer-0.0+a7e2e8dccbb913b74904edeec5549d868a2ea392bcd84fbc1965aba698dce3fc-py3-none-any.whl
INFO:absl:Successfully installed 'pipelines/penguin-simple/_wheels/tfx_user_code_Trainer-0.0+a7e2e8dccbb913b74904edeec5549d868a2ea392bcd84fbc1965aba698dce3fc-py3-none-any.whl'.
INFO:absl:Training model.
INFO:absl:Feature body_mass_g has a shape dim {
  size: 1
. Setting to DenseTensor.
INFO:absl:Feature culmen_depth_mm has a shape dim {
  size: 1
. Setting to DenseTensor.
INFO:absl:Feature culmen_length_mm has a shape dim {
  size: 1
. Setting to DenseTensor.
INFO:absl:Feature flipper_length_mm has a shape dim {
  size: 1
. Setting to DenseTensor.
INFO:absl:Feature species has a shape dim {
  size: 1
. Setting to DenseTensor.
Installing collected packages: tfx-user-code-Trainer
Successfully installed tfx-user-code-Trainer-0.0+a7e2e8dccbb913b74904edeec5549d868a2ea392bcd84fbc1965aba698dce3fc
INFO:absl:Feature body_mass_g has a shape dim {
  size: 1
. Setting to DenseTensor.
INFO:absl:Feature culmen_depth_mm has a shape dim {
  size: 1
. Setting to DenseTensor.
INFO:absl:Feature culmen_length_mm has a shape dim {
  size: 1
. Setting to DenseTensor.
INFO:absl:Feature flipper_length_mm has a shape dim {
  size: 1
. Setting to DenseTensor.
INFO:absl:Feature species has a shape dim {
  size: 1
. Setting to DenseTensor.
INFO:absl:Feature body_mass_g has a shape dim {
  size: 1
. Setting to DenseTensor.
INFO:absl:Feature culmen_depth_mm has a shape dim {
  size: 1
. Setting to DenseTensor.
INFO:absl:Feature culmen_length_mm has a shape dim {
  size: 1
. Setting to DenseTensor.
INFO:absl:Feature flipper_length_mm has a shape dim {
  size: 1
. Setting to DenseTensor.
INFO:absl:Feature species has a shape dim {
  size: 1
. Setting to DenseTensor.
INFO:absl:Feature body_mass_g has a shape dim {
  size: 1
. Setting to DenseTensor.
INFO:absl:Feature culmen_depth_mm has a shape dim {
  size: 1
. Setting to DenseTensor.
INFO:absl:Feature culmen_length_mm has a shape dim {
  size: 1
. Setting to DenseTensor.
INFO:absl:Feature flipper_length_mm has a shape dim {
  size: 1
. Setting to DenseTensor.
INFO:absl:Feature species has a shape dim {
  size: 1
. Setting to DenseTensor.
INFO:absl:Model: "model"
INFO:absl:Layer (type)                    Output Shape         Param #     Connected to                     
INFO:absl:culmen_length_mm (InputLayer)   [(None, 1)]          0                                            
INFO:absl:culmen_depth_mm (InputLayer)    [(None, 1)]          0                                            
INFO:absl:flipper_length_mm (InputLayer)  [(None, 1)]          0                                            
INFO:absl:body_mass_g (InputLayer)        [(None, 1)]          0                                            
INFO:absl:concatenate (Concatenate)       (None, 4)            0           culmen_length_mm[0][0]           
INFO:absl:                                                                 culmen_depth_mm[0][0]            
INFO:absl:                                                                 flipper_length_mm[0][0]          
INFO:absl:                                                                 body_mass_g[0][0]                
INFO:absl:dense (Dense)                   (None, 8)            40          concatenate[0][0]                
INFO:absl:dense_1 (Dense)                 (None, 8)            72          dense[0][0]                      
INFO:absl:dense_2 (Dense)                 (None, 3)            27          dense_1[0][0]                    
INFO:absl:Total params: 139
INFO:absl:Trainable params: 139
INFO:absl:Non-trainable params: 0
100/100 [==============================] - 1s 3ms/step - loss: 0.4074 - sparse_categorical_accuracy: 0.8755 - val_loss: 0.0760 - val_sparse_categorical_accuracy: 0.9800
2021-12-05 10:44:13.263941: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
INFO:tensorflow:Assets written to: pipelines/penguin-simple/Trainer/model/2/Format-Serving/assets
INFO:tensorflow:Assets written to: pipelines/penguin-simple/Trainer/model/2/Format-Serving/assets
INFO:absl:Training complete. Model written to pipelines/penguin-simple/Trainer/model/2/Format-Serving. ModelRun written to pipelines/penguin-simple/Trainer/model_run/2
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 2 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'model': [Artifact(artifact: uri: "pipelines/penguin-simple/Trainer/model/2"
custom_properties {
  key: "name"
  value {
    string_value: "penguin-simple:2021-12-05T10:44:06.706974:Trainer:model:0"
custom_properties {
  key: "tfx_version"
  value {
    string_value: "1.4.0"
, artifact_type: name: "Model"
)], 'model_run': [Artifact(artifact: uri: "pipelines/penguin-simple/Trainer/model_run/2"
custom_properties {
  key: "name"
  value {
    string_value: "penguin-simple:2021-12-05T10:44:06.706974:Trainer:model_run:0"
custom_properties {
  key: "tfx_version"
  value {
    string_value: "1.4.0"
, artifact_type: name: "ModelRun"
)]}) for execution 2
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:Component Trainer is finished.
I1205 10:44:13.795414 30480 rdbms_metadata_access_object.cc:686] No property is defined for the Type
INFO:absl:Component Pusher is running.
I1205 10:44:13.799805 30480 rdbms_metadata_access_object.cc:686] No property is defined for the Type
INFO:absl:Running launcher for node_info {
  type {
    name: "tfx.components.pusher.component.Pusher"
  id: "Pusher"
contexts {
  contexts {
    type {
      name: "pipeline"
    name {
      field_value {
        string_value: "penguin-simple"
  contexts {
    type {
      name: "pipeline_run"
    name {
      field_value {
        string_value: "2021-12-05T10:44:06.706974"
  contexts {
    type {
      name: "node"
    name {
      field_value {
        string_value: "penguin-simple.Pusher"
inputs {
  inputs {
    key: "model"
    value {
      channels {
        producer_node_query {
          id: "Trainer"
        context_queries {
          type {
            name: "pipeline"
          name {
            field_value {
              string_value: "penguin-simple"
        context_queries {
          type {
            name: "pipeline_run"
          name {
            field_value {
              string_value: "2021-12-05T10:44:06.706974"
        context_queries {
          type {
            name: "node"
          name {
            field_value {
              string_value: "penguin-simple.Trainer"
        artifact_query {
          type {
            name: "Model"
        output_key: "model"
outputs {
  outputs {
    key: "pushed_model"
    value {
      artifact_spec {
        type {
          name: "PushedModel"
parameters {
  parameters {
    key: "custom_config"
    value {
      field_value {
        string_value: "null"
  parameters {
    key: "push_destination"
    value {
      field_value {
        string_value: "{\n  \"filesystem\": {\n    \"base_directory\": \"serving_model/penguin-simple\"\n  }\n}"
upstream_nodes: "Trainer"
execution_options {
  caching_options {

INFO:absl:MetadataStore with DB connection initialized
I1205 10:44:13.821346 30480 rdbms_metadata_access_object.cc:686] No property is defined for the Type
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:Going to run a new execution 3
INFO:absl:Going to run a new execution: ExecutionInfo(execution_id=3, input_dict={'model': [Artifact(artifact: id: 2
type_id: 17
uri: "pipelines/penguin-simple/Trainer/model/2"
custom_properties {
  key: "name"
  value {
    string_value: "penguin-simple:2021-12-05T10:44:06.706974:Trainer:model:0"
custom_properties {
  key: "tfx_version"
  value {
    string_value: "1.4.0"
state: LIVE
create_time_since_epoch: 1638701053803
last_update_time_since_epoch: 1638701053803
, artifact_type: id: 17
name: "Model"
)]}, output_dict=defaultdict(<class 'list'>, {'pushed_model': [Artifact(artifact: uri: "pipelines/penguin-simple/Pusher/pushed_model/3"
custom_properties {
  key: "name"
  value {
    string_value: "penguin-simple:2021-12-05T10:44:06.706974:Pusher:pushed_model:0"
, artifact_type: name: "PushedModel"
)]}), exec_properties={'push_destination': '{\n  "filesystem": {\n    "base_directory": "serving_model/penguin-simple"\n  }\n}', 'custom_config': 'null'}, execution_output_uri='pipelines/penguin-simple/Pusher/.system/executor_execution/3/executor_output.pb', stateful_working_dir='pipelines/penguin-simple/Pusher/.system/stateful_working_dir/2021-12-05T10:44:06.706974', tmp_dir='pipelines/penguin-simple/Pusher/.system/executor_execution/3/.temp/', pipeline_node=node_info {
  type {
    name: "tfx.components.pusher.component.Pusher"
  id: "Pusher"
contexts {
  contexts {
    type {
      name: "pipeline"
    name {
      field_value {
        string_value: "penguin-simple"
  contexts {
    type {
      name: "pipeline_run"
    name {
      field_value {
        string_value: "2021-12-05T10:44:06.706974"
  contexts {
    type {
      name: "node"
    name {
      field_value {
        string_value: "penguin-simple.Pusher"
inputs {
  inputs {
    key: "model"
    value {
      channels {
        producer_node_query {
          id: "Trainer"
        context_queries {
          type {
            name: "pipeline"
          name {
            field_value {
              string_value: "penguin-simple"
        context_queries {
          type {
            name: "pipeline_run"
          name {
            field_value {
              string_value: "2021-12-05T10:44:06.706974"
        context_queries {
          type {
            name: "node"
          name {
            field_value {
              string_value: "penguin-simple.Trainer"
        artifact_query {
          type {
            name: "Model"
        output_key: "model"
outputs {
  outputs {
    key: "pushed_model"
    value {
      artifact_spec {
        type {
          name: "PushedModel"
parameters {
  parameters {
    key: "custom_config"
    value {
      field_value {
        string_value: "null"
  parameters {
    key: "push_destination"
    value {
      field_value {
        string_value: "{\n  \"filesystem\": {\n    \"base_directory\": \"serving_model/penguin-simple\"\n  }\n}"
upstream_nodes: "Trainer"
execution_options {
  caching_options {
, pipeline_info=id: "penguin-simple"
, pipeline_run_id='2021-12-05T10:44:06.706974')
WARNING:absl:Pusher is going to push the model without validation. Consider using Evaluator or InfraValidator in your pipeline.
INFO:absl:Model version: 1638701053
INFO:absl:Model written to serving path serving_model/penguin-simple/1638701053.
INFO:absl:Model pushed to pipelines/penguin-simple/Pusher/pushed_model/3.
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 3 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'pushed_model': [Artifact(artifact: uri: "pipelines/penguin-simple/Pusher/pushed_model/3"
custom_properties {
  key: "name"
  value {
    string_value: "penguin-simple:2021-12-05T10:44:06.706974:Pusher:pushed_model:0"
custom_properties {
  key: "tfx_version"
  value {
    string_value: "1.4.0"
, artifact_type: name: "PushedModel"
)]}) for execution 3
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:Component Pusher is finished.
I1205 10:44:13.851651 30480 rdbms_metadata_access_object.cc:686] No property is defined for the Type

คุณควรเห็น "INFO:absl:Component Pusher is done" ที่ส่วนท้ายของบันทึกหากไปป์ไลน์เสร็จสิ้น เพราะ Pusher องค์ประกอบคือองค์ประกอบสุดท้ายของท่อ

องค์ประกอบที่ผลักดันรูปแบบการฝึกอบรมให้กับ SERVING_MODEL_DIR ซึ่งเป็น serving_model/penguin-simple ไดเรกทอรีถ้าคุณไม่ได้เปลี่ยนตัวแปรในขั้นตอนก่อนหน้านี้ คุณสามารถดูผลลัพธ์จากเบราว์เซอร์ไฟล์ในแผงด้านซ้ายใน Colab หรือใช้คำสั่งต่อไปนี้:

# List files in created model directory.


คุณสามารถค้นหาแหล่งข้อมูลเพิ่มเติมเกี่ยวกับ https://www.tensorflow.org/tfx/tutorials

โปรดดู เข้าใจ TFX วางท่อ เพื่อเรียนรู้เพิ่มเติมเกี่ยวกับแนวคิดต่าง ๆ ใน TFX