ppc64-diag
Data Structures | Macros | Functions | Variables
rtas_errd.h File Reference

Main header for rtas_errd. More...

#include <signal.h>
#include <librtasevent.h>
#include <servicelog-1/servicelog.h>
#include "fru_prev6.h"
#include "config.h"

Go to the source code of this file.

Data Structures

struct  errdata
 
struct  diag_vpd
 
struct  event
 struct to track and handle RTAS events in rtas_errd. More...
 

Macros

#define RTAS_ERRD_ARGS   "dh"
 standard args for rtas_errd More...
 
#define RTAS_ERROR_LOG_MAX   4096
 
#define ADDL_TEXT_MAX   256
 
#define MAX(x, y)   ((x) > (y) ? (x) : (y))
 
#define TRUE   1
 
#define FALSE   0
 
#define RE_SCANLOG_AVAIL   0x00000001
 
#define RE_SERVICEABLE   0x00000002
 
#define RE_PLATDUMP_AVAIL   0x00000004
 
#define RE_PREDICTIVE   0x00000008
 
#define RE_HMC_TAGGED   0x40000000
 
#define RE_ALREADY_REPORTED   0x20000000
 
#define RE_RECOVERED_ERROR   0x10000000
 
#define dbg(_f, _a...)   _dbg("%s(): "_f, __FUNCTION__, ##_a)
 

Functions

void log_msg (struct event *, char *,...)
 Log messages to rtas_errd_log. More...
 
void cfg_log (char *,...)
 dummy interface for calls to diag_cfg More...
 
int init_files (void)
 Initialize files used by rtas_errd. More...
 
void close_files (void)
 Close all the files used by rtas_errd. More...
 
void _dbg (const char *,...)
 Write debug messages to stdout. More...
 
int print_rtas_event (struct event *)
 Print an RTAS event to the platform log. More...
 
int platform_log_write (char *,...)
 Write messages to the platform log. More...
 
void update_epow_status_file (int)
 Update the epow status file. More...
 
int read_proc_error_log (char *, int)
 Read data from proc_error_log. More...
 
void check_scanlog_dump (void)
 Check for new scanlog dumps. More...
 
void check_platform_dump (struct event *)
 Check RTAS event for a platform dump. More...
 
void check_eeh (struct event *)
 Check a RTAS event for EEH event notification. More...
 
void handle_resource_dealloc (struct event *)
 Parse RTAS event for CPU guard information. More...
 
int handle_rtas_event (struct event *)
 Main routine for processing RTAS events. More...
 
void update_rtas_msgs (void)
 Update RTAS messages in the platform log. More...
 
int process_pre_v6 (struct event *)
 Handle older (pre-v6) style events. More...
 
int get_error_fmt (struct event *)
 Extract the error log format indicator from the chrp error log. More...
 
int process_v6 (struct event *)
 
char * get_dt_status (char *)
 
char * diag_get_fru_pn (struct event *, char *)
 
void free_diag_vpd (struct event *)
 
int menugoal (struct event *, char *)
 
void epow_timer_handler (int, siginfo_t, void *)
 Routine to handle SIGALRM timer interrupts. More...
 
int check_epow (struct event *)
 Check an RTAS event for EPOW data. More...
 
time_t get_event_date (struct event *event)
 Retrieve the timestamp from an event. More...
 
int servicelog_sev (int rtas_sev)
 convert RTAS severity to servicelog severity More...
 
void add_callout (struct event *event, char pri, int type, char *proc, char *loc, char *pn, char *sn, char *ccin)
 Add a new FRU callout to the list for this event. More...
 
void log_event (struct event *)
 log the event in the servicelog DB More...
 
void sighup_handler (int, siginfo_t, void *)
 signal handler for SIGHUP More...
 
void restore_sigchld_default (void)
 restore child signal handler More...
 
void setup_sigchld_handler (void)
 setup child signal handler More...
 
void handle_prrn_event (struct event *)
 
void handle_hotplug_event (struct event *)
 

Variables

char * platform_log
 
char * messages_log
 
char * proc_error_log1
 File to read RTAS events from. More...
 
char * proc_error_log2
 Alternate file to read RTAS events from. More...
 
char * rtas_errd_log
 Message log for the rtas_errd daemon. More...
 
char * rtas_errd_log0
 Saved ("rolled over") messages log for rtas_errd daemon. More...
 
char * test_file
 
int platform_log_fd
 
int debug
 Debug level to run at for rtas_errd daemon. More...
 
char * scanlog
 buffer to hold scanlog dump path More...
 
struct servicelog * slog
 servicelog struct for libservicelog use More...
 
char * epow_status_file
 

Detailed Description

Copyright (C) 2004 IBM Corporation.

This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

Macro Definition Documentation

#define ADDL_TEXT_MAX   256
#define dbg (   _f,
  _a... 
)    _dbg("%s(): "_f, __FUNCTION__, ##_a)
#define FALSE   0

Referenced by guard_memlmb(), and process_pre_v6().

#define MAX (   x,
 
)    ((x) > (y) ? (x) : (y))

Referenced by find_event().

#define RE_ALREADY_REPORTED   0x20000000

Referenced by handle_rtas_event().

#define RE_HMC_TAGGED   0x40000000
#define RE_PLATDUMP_AVAIL   0x00000004

Referenced by check_platform_dump(), and log_event().

#define RE_PREDICTIVE   0x00000008

Referenced by handle_rtas_event().

#define RE_RECOVERED_ERROR   0x10000000

Referenced by handle_rtas_event().

#define RE_SCANLOG_AVAIL   0x00000001
#define RE_SERVICEABLE   0x00000002
#define RTAS_ERRD_ARGS   "dh"

Referenced by main().

#define RTAS_ERROR_LOG_MAX   4096
#define TRUE   1

Referenced by process_pre_v6().

Function Documentation

void _dbg ( const char *  fmt,
  ... 
)

dbg Provide utility to print debug statements if the debug flag is specified.

Parameters
fmtformat string a la printf()
...args a la printf()

References debug, reformat_msg(), and RTAS_ERROR_LOG_MAX.

Referenced by _log_msg().

void add_callout ( struct event event,
char  pri,
int  type,
char *  proc,
char *  loc,
char *  pn,
char *  sn,
char *  ccin 
)

add_callout

Parameters
eventevent to which to add the callout
pripriority
typetype
procprocedure ID
loclocation code
pnFRU part number
snFRU serial number
ccinFRU ccin

References log_msg().

Referenced by add_more_descrs(), report_src(), and set_srn_and_callouts().

void cfg_log ( char *  fmt,
  ... 
)

cfg_log

Parameters
fmtformatted string a la printf()
...additional args a la printf()

References _log_msg().

Referenced by main(), read_rtas_events(), and sighup_handler().

void check_eeh ( struct event event)

check_eeh Parse a RTAS event to see if this is an EEH event notification. If so, then update the platform log file with additional information about the EEH event.

Parameters
eventpointer to the RTAS event

References event::addl_text, ADDL_TEXT_MAX, event_desc::desc, log_msg(), platform_log_write(), event::rtas_event, and event_desc::src_code.

Referenced by handle_rtas_event().

int check_epow ( struct event event)

check_epow Parses error information to determine if it represents an EPOW event. If it is, the epow_status_file is updated with the appropriate condition, and EPOW_PROGRAM is invoked to take the appropriate system action (shutdown, etc).

Parameters
eventpointer to the RTAS event

References EPOW_PROGRAM, EPOW_PROGRAM_NOPATH, log_msg(), parse_epow(), and update_epow_status_file().

Referenced by handle_rtas_event().

void check_platform_dump ( struct event event)

check_platform_dump Parses error information to determine if it indicates the availability of a platform dump. The platform dump is copied to the filesystem, and the error log is updated to indicate the path to the dump.

This should be invoked before the error information is written to LOG_FILE, because the error may need to be updated with the path to the dump.

Parameters
eventpointer to struct event

References event::addl_text, ADDL_TEXT_MAX, d_cfg, dbg, DUMP_MAX_FNAME_LEN, EXTRACT_PLATDUMP_CMD, log_msg(), ppc64_diag_config::platform_dump_path, platform_log_write(), RE_PLATDUMP_AVAIL, restore_sigchld_default(), event::rtas_event, and setup_sigchld_handler().

Referenced by handle_rtas_event().

void check_scanlog_dump ( void  )

check_scanlog_dump This routine checks to see if a new scanlog dump is available, and if so, copies it to the filesystem. The return value is the filename of the new scanlog dump, or NULL if one is not copied. This routine will malloc space for the returned string; it is up to the caller to free it.

This routine should be invoked once when the daemon is started.

References d_cfg, DUMP_BUF_SZ, get_machine_serial(), load_scanlog_module(), log_msg(), scanlog, SCANLOG_DUMP_EXISTS, SCANLOG_DUMP_FILE, and ppc64_diag_config::scanlog_dump_path.

Referenced by main().

void close_files ( void  )

close_files Perform any file cleanup (i.e. close()) and possibly free()'ing buffers needed by rtas_errd before exiting.

References epow_status_fd, platform_log_fd, proc_error_log_fd, and rtas_errd_log_fd.

Referenced by main().

char* diag_get_fru_pn ( struct event event,
char *  phyloc 
)

diag_fru_pn_by_ploc

Returns the FRU part number from VPD, as defined by the "FN" vpd keyword, for the FRU given by the physical location code

RETURNS: 0 if not found information. 1 if found information. -1 if error found during search

References event::diag_vpd, diag_vpd::fn, get_base_loc(), get_diag_vpd(), and is_integrated().

Referenced by add_more_descrs(), and set_srn_and_callouts().

void epow_timer_handler ( int  sig,
siginfo_t  siginfo,
void *  context 
)

epow_timer_handler

Parameters
sigunused
siginfounused
contextunused

References SENSOR_TOKEN_EPOW_SENSOR, time_remaining, and update_epow_status_file().

Referenced by main().

void free_diag_vpd ( struct event )
char* get_dt_status ( char *  )

References dbg, log_msg(), and target_status.

Referenced by process_pre_v6().

int get_error_fmt ( struct event event)

get_error_fmt

Returns
An integer that is the error log format indicator.

References event::event_buf, and I_FORMAT.

Referenced by process_pre_v6().

time_t get_event_date ( struct event event)

get_event_date

Parameters
eventthe event from which to retrieve the timestamp
Returns
the timestamp as time since Epoch, or 0 on failure

References bcd_2b_toint, bcd_4b_toint, log_msg(), event::rtas_event, and event::rtas_hdr.

Referenced by process_pre_v6(), and process_v6().

void handle_hotplug_event ( struct event )
void handle_prrn_event ( struct event )
void handle_resource_dealloc ( struct event event)

handle_resource_dealloc Parses error information to determine if it represents a predictive CPU failure, which should cause a CPU Guard operation. DRMGR_PROGRAM is forked to actually remove the CPU from the system.

Parameters
eventrtas event

References guard_cpu(), guard_memlmb(), guard_mempage(), guard_spcpu(), log_msg(), event::rtas_event, and RTAS_V6_TYPE_RESOURCE_DEALLOC.

Referenced by handle_rtas_event().

int handle_rtas_event ( struct event event)
int init_files ( void  )

init_files Open the following files needed by the rtas_errd daemon: rtas_errd_log proc_error_log platform_log epow_status

Note: This should only be called once before any rtas_events are read.

Returns
0 on success, !0 on failure

References debug, log_msg(), platform_log, platform_log_fd, proc_error_log1, proc_error_log2, proc_error_log_fd, rtas_errd_log, rtas_errd_log_fd, and update_epow_status_file().

Referenced by main().

void log_event ( struct event event)
void log_msg ( struct event event,
char *  fmt,
  ... 
)
int menugoal ( struct event ,
char *   
)
int platform_log_write ( char *  fmt,
  ... 
)

platform_log_write Provide a printf() style interface to write messages to platform_log. All messages are prepended with "ppc64-diag" to match the expected format of the platform log.

Parameters
fmtformat string a la printf()
...additional args a la printf()
Returns
return code from write() call

References platform_log_fd.

Referenced by check_eeh(), check_platform_dump(), guard_cpu(), guard_memlmb(), guard_spcpu(), and handle_rtas_event().

int print_rtas_event ( struct event event)

print_rtas_event Prints the binary hexdump of an RTAS event to the PLATFORM_LOG file.

Parameters
eventpointer to the struct event to print
Returns
0 on success, !0 on failure

References dbg, event::event_buf, event::flags, log_msg(), platform_log, platform_log_fd, RE_SCANLOG_AVAIL, scanlog, and event::seq_num.

Referenced by handle_rtas_event().

int process_pre_v6 ( struct event event)

process_pre_v6

Parameters
eventthe event to be parsed
Returns
always returns 0

References add_cpu_id(), analyze_io_bus_error(), bypass_errdscr, cpu610, cpu611, cpu612, cpu613, cpu614, cpu615, cpu619, cpu710, cpu711, cpu712, cpu713, cpu714, cpu715, CPUALLZERO, CPUB12b0, CPUB12b1, CPUB12b2, CPUB12b3, CPUB12b4, CPUB12b5, CPUB12b6, CPUB12b7, dbg, event::event_buf, FALSE, FIRST_LOC, fru_callout_pre_v6::floc, event_description_pre_v6::frus, get_cpu_frus(), get_dt_status(), get_error_fmt(), get_error_type(), get_event_date(), get_loc_code(), get_register_data(), I_BYTE24, I_CPU, I_IBM, I_POSTCODE, io630, io631, io632, io633, io634, io639, io730, io731, io732, io733, io734, io735, io736, io770, io771, io772, io773, io832, io_error_type, IOALLZERO, IOB12b0, IOB12b1, IOB12b2, IOB12b3, IOB12b3B13b2, IOB12b4, IOB12b5, IOB12b5B13b1, IOB12b5B13b2, IOB12b6, IOB12b6B13b1, IOB12b6B13b2, IOB12b6B13b3, IOB12b7, IOB12b7B13b1, IOB12b7B13b2, IOB13b4, IOB13b5, IOB13b6, IOB13b7, device_ela::led, event::length, event::loc_codes, log_msg(), mem624, mem625, mem626, mem627, mem628, mem629, mem722, mem723, mem724, mem725, MEMALLZERO, MEMB12b0, MEMB12b1, MEMB12b2, MEMB12b3, MEMB12b4, MEMB12b4B13b3, MEMB12b5, MEMB12b6, MEMB12b7, MEMB13b0, MEMB13b1, MEMB13b2, MEMB13b3, MEMB13b4, memtest600, MSGMENUG174, NEXT_LOC, pct_index, post, POSTALLZERO, POSTB12b0, POSTB12b1, POSTB12b2, POSTB12b3, POSTB12b4, POSTB12b5, POSTB12b6, POSTB12b7, POSTB13b0, POSTB13b1, POSTB13b2, POSTB13b3, POSTB13b4, POSTB13b5, POSTB13b7, PREDICT_REPAIR_PENDING, PREDICT_UNRECOV, process_epow(), process_refcodes(), process_v2_sp(), process_v3_logs(), RC_PLANAR, RC_PLANAR_2CPU, RC_PLANAR_CPU, event_description_pre_v6::rcode, report_menugoal(), report_srn(), event_description_pre_v6::rmsg, event::rtas_event, event::rtas_hdr, servicelog_sev(), set_fru_percentages(), set_srn_and_callouts(), event::sl_entry, event_description_pre_v6::sn, SN_V3ELA, sp640, sp641, sp642, sp643, sp644, sp740, sp741, sp742, sp743, sp744, sp745, sp746, sp747, sp748, sp749, sp750, sp751, sp752, sp753, sp760, SPB16b0, SPB16b1, SPB16b2, SPB16b3, SPB16b4, SPB16b5, SPB16b6, SPB16b7, SPB17b0, SPB17b1, SPB17b2, SPB17b3, SPB17b4, SPB17b5, SPB18b0, SPB18b1, SPB18b2, SPB18b3, SPB18b4, SPB18b7, SRC_REG_ID_02, TRUE, and v3_errdscr.

Referenced by handle_rtas_event().

int process_v6 ( struct event )
int read_proc_error_log ( char *  buf,
int  buflen 
)

read_proc_error_log Read the data in from the /proc error log file. This routine also handles the debug case of reading in a test file that contains an ascii representation of a RTAS event.

Parameters
bufbuffer to read RTAS event in to.
buflenlength of buffer parameter
Returns
number of bytes read.

References log_msg(), proc_error_log1, proc_error_log2, and proc_error_log_fd.

Referenced by read_rtas_events().

void restore_sigchld_default ( void  )

restore_sigchld_default Restores child signal handler to default action.

References log_msg().

Referenced by check_platform_dump(), get_diag_vpd(), and retrieve_drc_name().

int servicelog_sev ( int  rtas_sev)

servicelog_sev

Parameters
rtas_sevRTAS severity to be converted
Returns
servicelog severity number, SL_SEV_INFO if unknown severity

Referenced by process_pre_v6(), and process_v6().

void setup_sigchld_handler ( void  )

setup_sigchld_handler Setup custom child signal handler to cleanup child processes when it exited.

References log_msg(), and sigchld_handler().

Referenced by check_platform_dump(), get_diag_vpd(), main(), and retrieve_drc_name().

void sighup_handler ( int  sig,
siginfo_t  siginfo,
void *  context 
)

sighup_handler The SIGHUP signal will cause the rtas_errd daemon to re-read the configuration file. If it is currently safe to re-configure ourselves we do, otherwise we set a flag to indicate that a re-configuration needs to occur at the next "safe" place

References cfg_log(), d_cfg, diag_cfg(), ppc64_diag_config::flags, RE_CFG_RECEIVED_SIGHUP, and RE_CFG_RECFG_SAFE.

Referenced by main().

void update_epow_status_file ( int  status)

update_epow_status_file Used to write the current EPOW status (as defined in the parse_epow() function (epow.c) comment) to the epow_status file.

Parameters
statusvalue to update epow_status file to.

References epow_status_fd, epow_status_file, and log_msg().

Referenced by check_epow(), epow_timer_handler(), and init_files().

void update_rtas_msgs ( void  )

update_rtas_msgs Update the file /var/log/platform with any RTAS events found in syslog that have not been handled by rtas_errd.

References dbg, event::event_buf, find_rtas_end(), find_rtas_start(), get_rtas_no(), handle_rtas_event(), log_msg(), messages_log, msgs_log_fd, platform_log, platform_log_fd, RTAS_ERROR_LOG_MAX, event::rtas_event, and RTAS_START.

Referenced by main().

Variable Documentation

int debug

Referenced by _dbg(), init_files(), and main().

char* epow_status_file
messages_log

Used by update_rtas_msgs() to bring the platform log up to date with current RTAS events.

Referenced by main(), and update_rtas_msgs().

char* platform_log
int platform_log_fd
char* proc_error_log1
char* proc_error_log2
char* rtas_errd_log
char* rtas_errd_log0

Referenced by _log_msg().

char* scanlog

This is a buffer that is allocated and filled when rtas_errd is initially exec()'ed via check_scanlog_dump(). The buffer will contain the path to a scanlog dump and is reported with the first RTAS event we receive from the kernel.

Referenced by check_scanlog_dump(), print_rtas_event(), and read_rtas_events().

struct servicelog* slog

Referenced by log_event(), and main().

char* test_file