Perguntas Frequentes

Implantação de Log com gravação no Banco de Dados
Última atualização 3 ano(s) atrás

Para que o log do Pentaho Data Integrator funcione gravando na base de dados, e podendo monitorar diariamente todos os passos, é necessário configurar o arquivo kettle.properties constante na pasta do usuário que executa o Spoon, usando os seguintes parâmetros iniciais:

KETTLE_CORE_JOBENTRIES_FILE=
KETTLE_LAZY_REPOSITORY=true
KETTLE_DEFAULT_DATE_FORMAT=
KETTLE_JOB_LOG_SCHEMA=log
KETTLE_CHANNEL_LOG_TABLE=channel
KETTLE_TRANS_PAN_JVM_EXIT_CODE=
KETTLE_SPLIT_FIELDS_REMOVE_ENCLOSURE=false
vfs.sftp.userDirIsRoot=false
KETTLE_COMPATIBILITY_PUR_OLD_NAMING_MODE=N
KETTLE_CARTE_JETTY_ACCEPT_QUEUE_SIZE=
KETTLE_MAX_LOGGING_REGISTRY_SIZE=10000
KETTLE_JNDI_ROOT=
KETTLE_DEFAULT_TIMESTAMP_FORMAT=
KETTLE_MAX_LOG_TIMEOUT_IN_MINUTES=1440
KETTLE_SHARED_OBJECTS=
KETTLE_GLOBAL_LOG_VARIABLES_CLEAR_ON_EXPORT=false
KETTLE_METRICS_LOG_TABLE=metrics
KETTLE_BATCHING_ROWSET=N
KETTLE_TRANS_LOG_TABLE=trans
KETTLE_JOBENTRY_LOG_SCHEMA=log
KETTLE_DEFAULT_INTEGER_FORMAT=
KETTLE_METRICS_LOG_DB=DW
KETTLE_CARTE_JETTY_RES_MAX_IDLE_TIME=
KETTLE_PLUGIN_CLASSES=
KETTLE_DEFAULT_BIGNUMBER_FORMAT=
KETTLE_REDIRECT_STDERR=N
KETTLE_FAIL_ON_LOGGING_ERROR=N
KETTLE_ROWSET_GET_TIMEOUT=50
KETTLE_CHANNEL_LOG_DB=DW
KETTLE_METRICS_LOG_SCHEMA=log
KETTLE_EMPTY_STRING_DIFFERS_FROM_NULL=N
KETTLE_ROWSET_PUT_TIMEOUT=50
KETTLE_AGGREGATION_MIN_NULL_IS_VALUED=N
KETTLE_JOB_LOG_TABLE=job
KETTLE_DEFAULT_NUMBER_FORMAT=
KETTLE_DEFAULT_SERVLET_ENCODING=
KETTLE_REDIRECT_STDOUT=N
KETTLE_TRANS_LOG_DB=DW
KETTLE_TRANS_PERFORMANCE_LOG_SCHEMA=log
KETTLE_MAX_LOG_SIZE_IN_LINES=5000
KETTLE_JOBENTRY_LOG_TABLE=job_entry
KETTLE_PLUGIN_PACKAGES=
KETTLE_COMPATIBILITY_TEXT_FILE_OUTPUT_APPEND_NO_HEADER=N
KETTLE_DISABLE_CONSOLE_LOGGING=N
KETTLE_TRANS_PERFORMANCE_LOG_DB=DW
KETTLE_MAX_JOB_TRACKER_SIZE=5000
PENTAHO_METASTORE_FOLDER=
KETTLE_STEP_LOG_TABLE=step
KETTLE_STEP_PERFORMANCE_SNAPSHOT_LIMIT=0
KETTLE_CORE_STEPS_FILE=
KETTLE_STEP_LOG_DB=DW
KETTLE_AGGREGATION_ALL_NULLS_ARE_ZERO=N
KETTLE_LENIENT_STRING_TO_NUMBER_CONVERSION=N
KETTLE_LOG_SIZE_LIMIT=0
KETTLE_PASSWORD_ENCODER_PLUGIN=Kettle
KETTLE_JOB_LOG_DB=DW
KETTLE_TRANS_PERFORMANCE_LOG_TABLE=trans_performance
KETTLE_CHANNEL_LOG_SCHEMA=log
KETTLE_STEP_LOG_SCHEMA=log
KETTLE_JOBENTRY_LOG_DB=DW
KETTLE_CARTE_JETTY_ACCEPTORS=
KETTLE_COMPATIBILITY_MERGE_ROWS_USE_REFERENCE_STREAM_WHEN_IDENTICAL=N
KETTLE_HIDE_DEVELOPMENT_VERSION_WARNING=N
KETTLE_LOG_TAB_REFRESH_PERIOD=1000
KETTLE_SYSTEM_HOSTNAME=
KETTLE_LOG_TAB_REFRESH_DELAY=1000
KETTLE_CARTE_OBJECT_TIMEOUT_MINUTES=1440
KETTLE_LOG_MARK_MAPPINGS=N
KETTLE_MAX_JOB_ENTRIES_LOGGED=5000
KETTLE_COMPATIBILITY_IMPORT_PATH_ADDITION_ON_VARIABLES=N
KETTLE_COMPATIBILITY_DB_IGNORE_TIMEZONE=N
KETTLE_TRANS_LOG_SCHEMA=log

O script inicial no banco de dados deve ser:

create schema st1;
create schema st2;
create schema dw;
create schema log;
create schema dim;

-- Metrics log table
--

CREATE TABLE log.metrics
(
  ID_BATCH INTEGER
, CHANNEL_ID VARCHAR(255)
, LOG_DATE TIMESTAMP
, METRICS_DATE TIMESTAMP
, METRICS_CODE VARCHAR(255)
, METRICS_DESCRIPTION VARCHAR(255)
, METRICS_SUBJECT VARCHAR(255)
, METRICS_TYPE VARCHAR(255)
, METRICS_VALUE BIGINT
)
;
-- Step log table
--

CREATE TABLE log.step
(
  ID_BATCH INTEGER
, CHANNEL_ID VARCHAR(255)
, LOG_DATE TIMESTAMP
, TRANSNAME VARCHAR(255)
, STEPNAME VARCHAR(255)
, STEP_COPY SMALLINT
, LINES_READ BIGINT
, LINES_WRITTEN BIGINT
, LINES_UPDATED BIGINT
, LINES_INPUT BIGINT
, LINES_OUTPUT BIGINT
, LINES_REJECTED BIGINT
, ERRORS BIGINT
)
;
-- Logging channel log table
--

CREATE TABLE log.channel
(
  ID_BATCH INTEGER
, CHANNEL_ID VARCHAR(255)
, LOG_DATE TIMESTAMP
, LOGGING_OBJECT_TYPE VARCHAR(255)
, OBJECT_NAME VARCHAR(255)
, OBJECT_COPY VARCHAR(255)
, REPOSITORY_DIRECTORY VARCHAR(255)
, FILENAME VARCHAR(255)
, OBJECT_ID VARCHAR(255)
, OBJECT_REVISION VARCHAR(255)
, PARENT_CHANNEL_ID VARCHAR(255)
, ROOT_CHANNEL_ID VARCHAR(255)
)
;
-- Step performance log table
--

CREATE TABLE log.trans_performance
(
  ID_BATCH INTEGER
, SEQ_NR INTEGER
, LOGDATE TIMESTAMP
, TRANSNAME VARCHAR(255)
, STEPNAME VARCHAR(255)
, STEP_COPY INTEGER
, LINES_READ BIGINT
, LINES_WRITTEN BIGINT
, LINES_UPDATED BIGINT
, LINES_INPUT BIGINT
, LINES_OUTPUT BIGINT
, LINES_REJECTED BIGINT
, ERRORS BIGINT
, INPUT_BUFFER_ROWS BIGINT
, OUTPUT_BUFFER_ROWS BIGINT
)
;
-- Transformation log table
--

CREATE TABLE log.trans
(
  ID_BATCH INTEGER
, CHANNEL_ID VARCHAR(255)
, TRANSNAME VARCHAR(255)
, STATUS VARCHAR(15)
, LINES_READ BIGINT
, LINES_WRITTEN BIGINT
, LINES_UPDATED BIGINT
, LINES_INPUT BIGINT
, LINES_OUTPUT BIGINT
, LINES_REJECTED BIGINT
, ERRORS BIGINT
, STARTDATE TIMESTAMP
, ENDDATE TIMESTAMP
, LOGDATE TIMESTAMP
, DEPDATE TIMESTAMP
, REPLAYDATE TIMESTAMP
, LOG_FIELD TEXT
)
;
CREATE INDEX IDX_trans_1 ON log.trans(ID_BATCH)
;
CREATE INDEX IDX_trans_2 ON log.trans(ERRORS, STATUS, TRANSNAME)
;
-- Job entry log table
--

CREATE TABLE log.job_entry
(
  ID_BATCH INTEGER
, CHANNEL_ID VARCHAR(255)
, LOG_DATE TIMESTAMP
, TRANSNAME VARCHAR(255)
, STEPNAME VARCHAR(255)
, LINES_READ BIGINT
, LINES_WRITTEN BIGINT
, LINES_UPDATED BIGINT
, LINES_INPUT BIGINT
, LINES_OUTPUT BIGINT
, LINES_REJECTED BIGINT
, ERRORS BIGINT
, "RESULT" BOOLEAN
, NR_RESULT_ROWS BIGINT
, NR_RESULT_FILES BIGINT
)
;
CREATE INDEX IDX_job_entry_1 ON log.job_entry(ID_BATCH)
;
-- Job log table
--

CREATE TABLE log.job
(
  ID_JOB INTEGER
, CHANNEL_ID VARCHAR(255)
, JOBNAME VARCHAR(255)
, STATUS VARCHAR(15)
, LINES_READ BIGINT
, LINES_WRITTEN BIGINT
, LINES_UPDATED BIGINT
, LINES_INPUT BIGINT
, LINES_OUTPUT BIGINT
, LINES_REJECTED BIGINT
, ERRORS BIGINT
, STARTDATE TIMESTAMP
, ENDDATE TIMESTAMP
, LOGDATE TIMESTAMP
, DEPDATE TIMESTAMP
, REPLAYDATE TIMESTAMP
, LOG_FIELD TEXT
)
;
CREATE INDEX IDX_job_1 ON log.job(ID_JOB)
;
CREATE INDEX IDX_job_2 ON log.job(ERRORS, STATUS, JOBNAME)
;

Após configuração, o Pentaho começará a mostrar o log em "Execution History":

image

Por favor, aguarde!

Por favor aguarde... vai levar um segundo!