Perguntas Frequentes
Implantação de Log com gravação no Banco de Dados
Última atualização 3 ano(s) atrás
Para que o log do Pentaho Data Integrator funcione gravando na base de dados, e podendo monitorar diariamente todos os passos, é necessário configurar o arquivo kettle.properties constante na pasta do usuário que executa o Spoon, usando os seguintes parâmetros iniciais:
KETTLE_CORE_JOBENTRIES_FILE= KETTLE_LAZY_REPOSITORY=true KETTLE_DEFAULT_DATE_FORMAT= KETTLE_JOB_LOG_SCHEMA=log KETTLE_CHANNEL_LOG_TABLE=channel KETTLE_TRANS_PAN_JVM_EXIT_CODE= KETTLE_SPLIT_FIELDS_REMOVE_ENCLOSURE=false vfs.sftp.userDirIsRoot=false KETTLE_COMPATIBILITY_PUR_OLD_NAMING_MODE=N KETTLE_CARTE_JETTY_ACCEPT_QUEUE_SIZE= KETTLE_MAX_LOGGING_REGISTRY_SIZE=10000 KETTLE_JNDI_ROOT= KETTLE_DEFAULT_TIMESTAMP_FORMAT= KETTLE_MAX_LOG_TIMEOUT_IN_MINUTES=1440 KETTLE_SHARED_OBJECTS= KETTLE_GLOBAL_LOG_VARIABLES_CLEAR_ON_EXPORT=false KETTLE_METRICS_LOG_TABLE=metrics KETTLE_BATCHING_ROWSET=N KETTLE_TRANS_LOG_TABLE=trans KETTLE_JOBENTRY_LOG_SCHEMA=log KETTLE_DEFAULT_INTEGER_FORMAT= KETTLE_METRICS_LOG_DB=DW KETTLE_CARTE_JETTY_RES_MAX_IDLE_TIME= KETTLE_PLUGIN_CLASSES= KETTLE_DEFAULT_BIGNUMBER_FORMAT= KETTLE_REDIRECT_STDERR=N KETTLE_FAIL_ON_LOGGING_ERROR=N KETTLE_ROWSET_GET_TIMEOUT=50 KETTLE_CHANNEL_LOG_DB=DW KETTLE_METRICS_LOG_SCHEMA=log KETTLE_EMPTY_STRING_DIFFERS_FROM_NULL=N KETTLE_ROWSET_PUT_TIMEOUT=50 KETTLE_AGGREGATION_MIN_NULL_IS_VALUED=N KETTLE_JOB_LOG_TABLE=job KETTLE_DEFAULT_NUMBER_FORMAT= KETTLE_DEFAULT_SERVLET_ENCODING= KETTLE_REDIRECT_STDOUT=N KETTLE_TRANS_LOG_DB=DW KETTLE_TRANS_PERFORMANCE_LOG_SCHEMA=log KETTLE_MAX_LOG_SIZE_IN_LINES=5000 KETTLE_JOBENTRY_LOG_TABLE=job_entry KETTLE_PLUGIN_PACKAGES= KETTLE_COMPATIBILITY_TEXT_FILE_OUTPUT_APPEND_NO_HEADER=N KETTLE_DISABLE_CONSOLE_LOGGING=N KETTLE_TRANS_PERFORMANCE_LOG_DB=DW KETTLE_MAX_JOB_TRACKER_SIZE=5000 PENTAHO_METASTORE_FOLDER= KETTLE_STEP_LOG_TABLE=step KETTLE_STEP_PERFORMANCE_SNAPSHOT_LIMIT=0 KETTLE_CORE_STEPS_FILE= KETTLE_STEP_LOG_DB=DW KETTLE_AGGREGATION_ALL_NULLS_ARE_ZERO=N KETTLE_LENIENT_STRING_TO_NUMBER_CONVERSION=N KETTLE_LOG_SIZE_LIMIT=0 KETTLE_PASSWORD_ENCODER_PLUGIN=Kettle KETTLE_JOB_LOG_DB=DW KETTLE_TRANS_PERFORMANCE_LOG_TABLE=trans_performance KETTLE_CHANNEL_LOG_SCHEMA=log KETTLE_STEP_LOG_SCHEMA=log KETTLE_JOBENTRY_LOG_DB=DW KETTLE_CARTE_JETTY_ACCEPTORS= KETTLE_COMPATIBILITY_MERGE_ROWS_USE_REFERENCE_STREAM_WHEN_IDENTICAL=N KETTLE_HIDE_DEVELOPMENT_VERSION_WARNING=N KETTLE_LOG_TAB_REFRESH_PERIOD=1000 KETTLE_SYSTEM_HOSTNAME= KETTLE_LOG_TAB_REFRESH_DELAY=1000 KETTLE_CARTE_OBJECT_TIMEOUT_MINUTES=1440 KETTLE_LOG_MARK_MAPPINGS=N KETTLE_MAX_JOB_ENTRIES_LOGGED=5000 KETTLE_COMPATIBILITY_IMPORT_PATH_ADDITION_ON_VARIABLES=N KETTLE_COMPATIBILITY_DB_IGNORE_TIMEZONE=N KETTLE_TRANS_LOG_SCHEMA=log
O script inicial no banco de dados deve ser:
create schema st1; create schema st2; create schema dw; create schema log; create schema dim; -- Metrics log table -- CREATE TABLE log.metrics ( ID_BATCH INTEGER , CHANNEL_ID VARCHAR(255) , LOG_DATE TIMESTAMP , METRICS_DATE TIMESTAMP , METRICS_CODE VARCHAR(255) , METRICS_DESCRIPTION VARCHAR(255) , METRICS_SUBJECT VARCHAR(255) , METRICS_TYPE VARCHAR(255) , METRICS_VALUE BIGINT ) ; -- Step log table -- CREATE TABLE log.step ( ID_BATCH INTEGER , CHANNEL_ID VARCHAR(255) , LOG_DATE TIMESTAMP , TRANSNAME VARCHAR(255) , STEPNAME VARCHAR(255) , STEP_COPY SMALLINT , LINES_READ BIGINT , LINES_WRITTEN BIGINT , LINES_UPDATED BIGINT , LINES_INPUT BIGINT , LINES_OUTPUT BIGINT , LINES_REJECTED BIGINT , ERRORS BIGINT ) ; -- Logging channel log table -- CREATE TABLE log.channel ( ID_BATCH INTEGER , CHANNEL_ID VARCHAR(255) , LOG_DATE TIMESTAMP , LOGGING_OBJECT_TYPE VARCHAR(255) , OBJECT_NAME VARCHAR(255) , OBJECT_COPY VARCHAR(255) , REPOSITORY_DIRECTORY VARCHAR(255) , FILENAME VARCHAR(255) , OBJECT_ID VARCHAR(255) , OBJECT_REVISION VARCHAR(255) , PARENT_CHANNEL_ID VARCHAR(255) , ROOT_CHANNEL_ID VARCHAR(255) ) ; -- Step performance log table -- CREATE TABLE log.trans_performance ( ID_BATCH INTEGER , SEQ_NR INTEGER , LOGDATE TIMESTAMP , TRANSNAME VARCHAR(255) , STEPNAME VARCHAR(255) , STEP_COPY INTEGER , LINES_READ BIGINT , LINES_WRITTEN BIGINT , LINES_UPDATED BIGINT , LINES_INPUT BIGINT , LINES_OUTPUT BIGINT , LINES_REJECTED BIGINT , ERRORS BIGINT , INPUT_BUFFER_ROWS BIGINT , OUTPUT_BUFFER_ROWS BIGINT ) ; -- Transformation log table -- CREATE TABLE log.trans ( ID_BATCH INTEGER , CHANNEL_ID VARCHAR(255) , TRANSNAME VARCHAR(255) , STATUS VARCHAR(15) , LINES_READ BIGINT , LINES_WRITTEN BIGINT , LINES_UPDATED BIGINT , LINES_INPUT BIGINT , LINES_OUTPUT BIGINT , LINES_REJECTED BIGINT , ERRORS BIGINT , STARTDATE TIMESTAMP , ENDDATE TIMESTAMP , LOGDATE TIMESTAMP , DEPDATE TIMESTAMP , REPLAYDATE TIMESTAMP , LOG_FIELD TEXT ) ; CREATE INDEX IDX_trans_1 ON log.trans(ID_BATCH) ; CREATE INDEX IDX_trans_2 ON log.trans(ERRORS, STATUS, TRANSNAME) ; -- Job entry log table -- CREATE TABLE log.job_entry ( ID_BATCH INTEGER , CHANNEL_ID VARCHAR(255) , LOG_DATE TIMESTAMP , TRANSNAME VARCHAR(255) , STEPNAME VARCHAR(255) , LINES_READ BIGINT , LINES_WRITTEN BIGINT , LINES_UPDATED BIGINT , LINES_INPUT BIGINT , LINES_OUTPUT BIGINT , LINES_REJECTED BIGINT , ERRORS BIGINT , "RESULT" BOOLEAN , NR_RESULT_ROWS BIGINT , NR_RESULT_FILES BIGINT ) ; CREATE INDEX IDX_job_entry_1 ON log.job_entry(ID_BATCH) ; -- Job log table -- CREATE TABLE log.job ( ID_JOB INTEGER , CHANNEL_ID VARCHAR(255) , JOBNAME VARCHAR(255) , STATUS VARCHAR(15) , LINES_READ BIGINT , LINES_WRITTEN BIGINT , LINES_UPDATED BIGINT , LINES_INPUT BIGINT , LINES_OUTPUT BIGINT , LINES_REJECTED BIGINT , ERRORS BIGINT , STARTDATE TIMESTAMP , ENDDATE TIMESTAMP , LOGDATE TIMESTAMP , DEPDATE TIMESTAMP , REPLAYDATE TIMESTAMP , LOG_FIELD TEXT ) ; CREATE INDEX IDX_job_1 ON log.job(ID_JOB) ; CREATE INDEX IDX_job_2 ON log.job(ERRORS, STATUS, JOBNAME) ;
Após configuração, o Pentaho começará a mostrar o log em "Execution History":