מעריכים מותאמים אישית של TF Lattice

אזהרה: אומדים איננו מומלצים קוד חדש. אומדים לרוץ v1. Session קוד -style וזה יותר קשה לכתוב בצורה נכונה, והוא יכול להתנהג באופן בלתי צפוי, במיוחד בשילוב עם קוד 2 TF. אומדים נופלים תחת שלנו ערבויות תאימות , אבל הם לא יקבלו שום תכונות נוספות, ולא יהיו שום תיקונים אחרים מאשר פגיעויות אבטחה. עיין במדריך ההעברה לפרטים.

סקירה כללית

אתה יכול להשתמש באומדנים מותאמים אישית כדי ליצור מודלים מונוטוניים באופן שרירותי באמצעות שכבות TFL. מדריך זה מתאר את השלבים הדרושים ליצירת אומדנים כאלה.

להכין

התקנת חבילת TF Lattice:

pip install tensorflow-lattice

ייבוא חבילות נדרשות:

import tensorflow as tf

import logging
import numpy as np
import pandas as pd
import sys
import tensorflow_lattice as tfl
from tensorflow import feature_column as fc

from tensorflow_estimator.python.estimator.canned import optimizers
from tensorflow_estimator.python.estimator.head import binary_class_head
logging.disable(sys.maxsize)

הורדת מערך הנתונים של UCI Statlog (לב):

csv_file = tf.keras.utils.get_file(
    'heart.csv', 'http://storage.googleapis.com/download.tensorflow.org/data/heart.csv')
df = pd.read_csv(csv_file)
target = df.pop('target')
train_size = int(len(df) * 0.8)
train_x = df[:train_size]
train_y = target[:train_size]
test_x = df[train_size:]
test_y = target[train_size:]
df.head()

הגדרת ערכי ברירת המחדל המשמשים לאימון במדריך זה:

LEARNING_RATE = 0.1
BATCH_SIZE = 128
NUM_EPOCHS = 1000

עמודות תכונה

באשר לכל אומדן TF אחרות, הצרכים הנתונים שיועברו במעריך, שהוא בדרך כלל באמצעות input_fn ו מנותח באמצעות FeatureColumns .

# Feature columns.
# - age
# - sex
# - ca        number of major vessels (0-3) colored by flourosopy
# - thal      3 = normal; 6 = fixed defect; 7 = reversable defect
feature_columns = [
    fc.numeric_column('age', default_value=-1),
    fc.categorical_column_with_vocabulary_list('sex', [0, 1]),
    fc.numeric_column('ca'),
    fc.categorical_column_with_vocabulary_list(
        'thal', ['normal', 'fixed', 'reversible']),
]

לידיעתך, תכונות קטגורים לא צריכה להיות עטוף על ידי טור תכונה צפוף, מאז tfl.laysers.CategoricalCalibration שכבה יכולה ישירות לצרוך מדדים בקטגוריה.

יצירת input_fn

באשר לכל מעריך אחר, אתה יכול להשתמש ב-input_fn כדי להזין נתונים למודל לצורך הדרכה והערכה.

train_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn(
    x=train_x,
    y=train_y,
    shuffle=True,
    batch_size=BATCH_SIZE,
    num_epochs=NUM_EPOCHS,
    num_threads=1)

test_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn(
    x=test_x,
    y=test_y,
    shuffle=False,
    batch_size=BATCH_SIZE,
    num_epochs=1,
    num_threads=1)

יצירת model_fn

ישנן מספר דרכים ליצור אומדן מותאם אישית. כאן נוכל לבנות model_fn הקוראת מודל Keras על tensors הקלט המנותח. כדי לנתח את תכונות קלט, אתה יכול להשתמש tf.feature_column.input_layer , tf.keras.layers.DenseFeatures , או tfl.estimators.transform_features . אם תשתמשו באחרון, לא תצטרכו לעטוף מאפיינים קטגוריים בעמודות תכונה צפופות, והטנזורים המתקבלים לא ישורכו, מה שמקל על השימוש בתכונות בשכבות הכיול.

כדי לבנות מודל, אתה יכול לערבב ולהתאים שכבות TFL או כל שכבות Keras אחרות. כאן אנו יוצרים מודל סריג Keras מכויל משכבות TFL ומטילים מספר מגבלות מונוטוניות. לאחר מכן אנו משתמשים במודל Keras כדי ליצור את האומד המותאם אישית.

def model_fn(features, labels, mode, config):
  """model_fn for the custom estimator."""
  del config
  input_tensors = tfl.estimators.transform_features(features, feature_columns)
  inputs = {
      key: tf.keras.layers.Input(shape=(1,), name=key) for key in input_tensors
  }

  lattice_sizes = [3, 2, 2, 2]
  lattice_monotonicities = ['increasing', 'none', 'increasing', 'increasing']
  lattice_input = tf.keras.layers.Concatenate(axis=1)([
      tfl.layers.PWLCalibration(
          input_keypoints=np.linspace(10, 100, num=8, dtype=np.float32),
          # The output range of the calibrator should be the input range of
          # the following lattice dimension.
          output_min=0.0,
          output_max=lattice_sizes[0] - 1.0,
          monotonicity='increasing',
      )(inputs['age']),
      tfl.layers.CategoricalCalibration(
          # Number of categories including any missing/default category.
          num_buckets=2,
          output_min=0.0,
          output_max=lattice_sizes[1] - 1.0,
      )(inputs['sex']),
      tfl.layers.PWLCalibration(
          input_keypoints=[0.0, 1.0, 2.0, 3.0],
          output_min=0.0,
          output_max=lattice_sizes[0] - 1.0,
          # You can specify TFL regularizers as tuple
          # ('regularizer name', l1, l2).
          kernel_regularizer=('hessian', 0.0, 1e-4),
          monotonicity='increasing',
      )(inputs['ca']),
      tfl.layers.CategoricalCalibration(
          num_buckets=3,
          output_min=0.0,
          output_max=lattice_sizes[1] - 1.0,
          # Categorical monotonicity can be partial order.
          # (i, j) indicates that we must have output(i) <= output(j).
          # Make sure to set the lattice monotonicity to 'increasing' for this
          # dimension.
          monotonicities=[(0, 1), (0, 2)],
      )(inputs['thal']),
  ])
  output = tfl.layers.Lattice(
      lattice_sizes=lattice_sizes, monotonicities=lattice_monotonicities)(
          lattice_input)

  training = (mode == tf.estimator.ModeKeys.TRAIN)
  model = tf.keras.Model(inputs=inputs, outputs=output)
  logits = model(input_tensors, training=training)

  if training:
    optimizer = optimizers.get_optimizer_instance_v2('Adagrad', LEARNING_RATE)
  else:
    optimizer = None

  head = binary_class_head.BinaryClassHead()
  return head.create_estimator_spec(
      features=features,
      mode=mode,
      labels=labels,
      optimizer=optimizer,
      logits=logits,
      trainable_variables=model.trainable_variables,
      update_ops=model.updates)

הדרכה ואומדן

שימוש model_fn נוכל ליצור ולאמן אומדן.

estimator = tf.estimator.Estimator(model_fn=model_fn)
estimator.train(input_fn=train_input_fn)
results = estimator.evaluate(input_fn=test_input_fn)
print('AUC: {}'.format(results['auc']))

2021-09-30 20:51:11.094402: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
AUC: 0.5946115255355835

מעריכים מותאמים אישית של TF Lattice קל לארגן דפים בעזרת אוספים אפשר לשמור ולסווג תוכן על סמך ההעדפות שלך.