Script:诊断Scheduler信息

以下脚本可以用于针对10g以后的Scheduler信息:

set long 400
set pagesize 1000
set linesize 120
column log_date format a37
column operation format a8
column status format a6
column additional_info format a400
column systimestamp format a37
column next_start_date format a40
column start_date format a40
column manual_open_time format a40
column manual_duration format a40
column duration format a40
column end_date format a40
column last_start_date format a40
column window_name format a26
column systimestamp format a35
column ATTR_INTV format a37
column ATTR_TSTAMP format a37
column start_time format a35
column obj_name format a20
column name format a30
column value format a30
column job_queue_name format a18  
column job_type format 9999
column flag format 9999  
column status format a6
column "SEQUENCE#" format 9999
column id format 99999
spool wintest
select * from dba_scheduler_windows ;
select log_date, window_name, operation,status, substr(additional_info,1,350)
from dba_scheduler_window_log order by 1 ;
select window_name, substr(additional_info,1,350) x 
from dba_scheduler_window_log where additional_info is not null;
-- Report current time in scheduler format
select dbms_scheduler.stime from dual;
-- Report Internal Queues v10.2(before)
select job_queue_name,   JOB_TYPE,  
(select object_name from dba_objects where object_id = job_oid
) OBJ_NAME,  FLAG, start_time from x$jskjobq;
-- Report current open window attributes(before)
SELECT o.name, o.namespace, a.*
 FROM sys.obj$ o, sys.scheduler$_global_attribute a
  WHERE o.obj# = a.obj# AND BITAND(a.flags,1) != 1
   AND o.name = 'CURRENT_OPEN_WINDOW' AND o.namespace = 51;
-- To implement the solution, please execute the following steps::
exec dbms_scheduler.disable('WEEKEND_WINDOW');
exec dbms_scheduler.disable('WEEKNIGHT_WINDOW');
exec dbms_scheduler.set_scheduler_attribute('SCHEDULER_DISABLED', 'TRUE');
select value from v$parameter where name='job_queue_processes';
alter system set job_queue_processes=0;
exec dbms_ijob.set_enabled(FALSE);
alter system flush shared_pool;
alter system flush shared_pool;
declare
cwo number;
begin
select o.obj# into cwo from sys.obj$ o
where o.name = 'CURRENT_OPEN_WINDOW' and o.namespace = 51;
update sys.scheduler$_global_attribute set value = null, attr_tstamp = null,
attr_intv = null, additional_info = null where obj# = cwo;
end;
/
exec dbms_ijob.set_enabled(TRUE);
exec dbms_scheduler.enable('WEEKEND_WINDOW');
exec dbms_scheduler.enable('WEEKNIGHT_WINDOW');
exec dbms_scheduler.set_scheduler_attribute('SCHEDULER_DISABLED', 'FALSE');
exec dbms_ijob.set_enabled(TRUE);
-- Report current time in scheduler format
select dbms_scheduler.stime from dual;
-- Report Internal Queues v10.2(after)
select job_queue_name, JOB_TYPE,
(select object_name from dba_objects where object_id = job_oid
) OBJ_NAME, FLAG, start_time from x$jskjobq;
-- Report current open window attributes(after)
SELECT o.name, o.namespace, a.*
FROM sys.obj$ o, sys.scheduler$_global_attribute a
WHERE o.obj# = a.obj# AND BITAND(a.flags,1) != 1
AND o.name = 'CURRENT_OPEN_WINDOW' AND o.namespace = 51;
ACCEPT J_NUM NUMBER PROMPT "Enter job_queue_processes: "
prompt Setting job_queue_processes=&&J_NUM
alter system set job_queue_processes=&&J_NUM;
select value from v$parameter where name='job_queue_processes';
spool off

Advanced Diagnostic using oradebug dumpvar

oradebug工具是Oracle数据库调优和诊断的利器,合理运用oradebug可以大幅减少我们收集诊断信息所花费的时间。当然前提是合理运用,对于初级DBA有这样一个忠告,不要在生产环境中去接触或修改自己所不熟悉的领域的东西,这一点很重要。并不是说中高级DBA在知识和经验上能达到巨细靡遗的状态,实际上中高级Oracle DBA可能每天都在和新事物打交道。恰恰相反,中高级DBA是对以上忠告最忠实的践行者,在生产环境中绝不随意涉险于不确定、不明确、不熟悉的区域,很多时候这会让人觉得是一种”好奇心丧失”的表现。

言归正传,我们所要介绍的是oradebug的dumpvar命令,该命令用于打印转储固定的PGA/UGA/SGA的变量:,其语法如下:

oradebug dumpvar
Print/dump a fixed PGA/SGA/UGA variable.
Syntax                                                              Parameter
oradebug dumpvar <p|s|uga> <variable name> [level]                  <p|s|uga> PGA,SGA or UGA
fixed variable name
[level]
使用示例
SQL> select * from v$version;
BANNER
----------------------------------------------------------------
Oracle Database 10g Enterprise Edition Release 10.2.0.4.0 - 64bi
PL/SQL Release 10.2.0.4.0 - Production
CORE    10.2.0.4.0      Production
TNS for Linux: Version 10.2.0.4.0 - Production
NLSRTL Version 10.2.0.4.0 - Production
SQL> select * from global_name;
GLOBAL_NAME
--------------------------------------------------------------------------------
www.askmaclean.com
SQL> show parameter db_files
NAME                                 TYPE        VALUE
------------------------------------ ----------- ------------------------------
db_files                             integer     200
SQL> oradebug setmypid;
Statement processed.
SQL> oradebug dumpvar sga kcfdpk
kfil kcfdpk_ [698B950, 698B954) = 000000C8
这里的KCFDPK变量保存了db_files参数的值,000000C8 == 200
利用oradebug工具我们不仅可以做到对这些内部变量的窥视,还可以做到修改,
但是这对我们实际的诊断没有益处,仅仅可以用来满足某些模拟实验

仅仅了解dumpvar命令的使用方法并没有意义,只有和有意义的内部变量结合起来才能真正起到高级诊断的作用,在这里抛砖引玉罗列出部分诊断变量:

sgauso --  该变量可以用于判断use_stored_outlines
SQL> oradebug setmypid;
Statement processed.
Default use_stored_outlines=false;
SQL> oradebug dumpvar sga sgauso
qolprm sgauso_ [060021418, 06002143C) =
00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
SQL> alter system set use_stored_outlines=true;
System altered.
SQL> oradebug dumpvar sga sgauso
qolprm sgauso_ [060021418, 06002143C) =
00000001 45440007 4C554146 00000054 00000000 00000000 00000000 00000000 00000000
45440007 4C554146 ==>      DEFAULT,意为优化器使用DEFAULT category中存放的outline.
ksmvpa -- the size of the variable part of the pga
SQL> oradebug dumpvar pga ksmvpa
b4 ksmvpa_ [0068AA6B4, 0068AA6B8) = 0000E920          -- 59680 bytes
kkjsre        -- The SGA variable kkjsre must be 1 for jobs to execute automatically.
SQL> oradebug  dumpvar sga kkjsre
word kkjsre_ [060025760, 060025764) = 00000001
SQL> exec  dbms_ijob.set_enabled(false);
PL/SQL procedure successfully completed.
SQL> oradebug  dumpvar sga kkjsre
word kkjsre_ [060025760, 060025764) = 00000000

kcmsmx          --the MAX reasonable scn 
SQL> oradebug dumpvar pga kcmsmx
kscn kcmsmx_ [00B7E811C, 00B7E8124) = CA7F0000 00000C6C         -- 11.2.0.2
SQL> oradebug dumpvar pga kcmsmx
kscn kcmsmx_ [0068AB02C, 0068AB034) = A701C000 00000B3D         -- 10.2.0.4 
Notice the MAX scn allowed on 11.2 is far higher that that at earlier releases
(due to the fix from bug 9254170).
Use the fix for 9254170 on ALL DBs with an SCN rate set to match between versions.
Avoid excessive SCN generation rates.
kcvlcm - logging checkpoints to alert - FALSE
SQL> show parameter log_checkpoints_to_alert
NAME                                 TYPE        VALUE
------------------------------------ ----------- ------------------------------
log_checkpoints_to_alert             boolean     TRUE
SQL> oradebug dumpvar sga kcvlcm
word kcvlcm_ [060019E88, 060019E8C) = 00000001
SQL> alter system set log_checkpoints_to_alert=false;
System altered.
SQL> oradebug dumpvar sga kcvlcm
word kcvlcm_ [060019E88, 060019E8C) = 00000000
kcvcpr - false (a checkpoint is requested)
SQL> oradebug dumpvar sga  kcvcpr
word kcvcpr_ [060019E90, 060019E94) = 00000000
kcvcpf - false (checkpointing fast)
kcvgcw -false a global checkpointing is waiting
SQL> oradebug dumpvar sga kcvgcw
sword kcvgcw_ [060025748, 06002574C) = 00000000

kcfcpd - true if checkpoint writes done
kcvblg  kcvblg[0] is s 1 incidate some file/s in backup mode
SQL> oradebug dumpvar sga kcvblg
ub4 * kcvblg_ [060019E18, 060019E20) = 9A248508 00000000
SQL> oradebug peek 0x9A248508 100
[09A248508, 09A24856C) =
00000001 00000000 00000000 00000000 00000000
00000000 00000000 00000000 00000002 00000000
00000002 00000002 00000000 00000000 ...
alter database begin backup;
SQL>  oradebug peek 0x9A248508 100
[09A248508, 09A24856C) =
00000001 00000003 00000003 00000003 00000003
00000003 00000003 00000003 00000003 00000003
00000003 00000003 00000003 00000003 ...
kcsgscn_           -- current scn 
SQL> oradebug dumpvar sga kcsgscn_
kcslf kcsgscn_ [0600122B0, 0600122E0) =
01C7DB4B 00000000 00000000 00000000              01C7DB4B  -- 29875019
0001E907 00000000 00000000 00000000
00000000 00000000 60011F90 00000000              60011F90 is fixed
SQL> select current_scn from v$database;
CURRENT_SCN
-----------
29875044

kslwlst_  --  waiter list latch
SQL> oradebug dumpvar sga kslwlst
ksllt kslwlst_ [200040AC, 20004174) =
00000000 00000009 00000000 00000000 00000000
00000000 00000000 00000000 00000000 00000000
00000000 00000000 00000000 ...
kcfdfk -- 2 * db_files

SQL> oradebug dumpvar sga kcfdfk
kfil kcfdfk_ [060017EF0, 060017EF4) = 00000190            --400    when db_files=200 
SQL> alter system set db_files=2000 scope=spfile;
System altered.
SQL> oradebug dumpvar sga kcfdfk
kfil kcfdfk_ [060017EF0, 060017EF4) = 00000FA0             --4000
kcffsz  Address of Datafile Size 
So kcbzib() passes in information about how many blocks to read
( and where to start reading from ) The assertion is 
if (bno < 2 || bno + nblks - 1 > (kcflsz[fno] ? kcflsz[fno] : kcffsz[fno]))
==============================================================
.......NOTE: This section is removed after 9g..................
{
/* if the user gave us the block number then this is an external error,
* but if the dba was internally generated then this is a corruption of
* some kind. */
ASSERTNM5(usrdba, OERINM("kcfrbd_2"),
fno, bno, nblks, kcflsz[fno], kcffsz[fno]);
SQL> oradebug dumpvar sga kcffsz
ub4 * kcffsz_ [060017F20, 060017F28) = 9988E700 00000000
SQL> oradebug peek 0x9988E700 200
[09988E700, 09988E7C8) =
00000001
0007B200
00030980
0000AF00
00067CA0
00003200
00280000
00000500
00000000
00000A00
00000000
00000000
00000600
00040000 ...
SQL> select blocks,to_char(blocks,'XXXXXXXX') hex_blocks from v$datafile;
BLOCKS HEX_BLOCK
---------- ---------
504320     7B200
199040     30980
44800      AF00
425120     67CA0
12800      3200
2621440    280000
1280       500
0         0
2560       A00
38400      9600
6400      1900
BLOCKS HEX_BLOCK
---------- ---------
1536       600
262144     40000
13 rows selected.
kcflsz -- like kcffsz

SQL> oradebug dumpvar  sga kcflsz
ub4 * kcflsz_ [060017F28, 060017F30) = 99892588 00000000
SQL> oradebug peek 0x99892588 200
[099892588, 099892650) =
00000000 0007B200 00030980 0000AF00 00067CA0
00003200 00280000 00000500 00000000 00000A00
00000000 00000000 00000600 00040000 ...
THe arguments are file number, block number, number of blocks, kcflsz[fno],
kcffsz[fno]
Error comes out of kcf.c.
Can they try a validate structure cascade on this table and see what happens?
Please update the customer field so that it does not read internal.

kcf.c kcf.c Kernel Cache Files component.
ksl.c Kernel Service layer Latching & Wait-post Implement.
ksm.c Kernel Service Memory component implementation.
kkj.c Kernel Kompiletime Job queue.

Script:收集ASM诊断信息

收集ASM诊断信息最佳的工具仍是RDA,对于不能使用RDA的环境可以采用如下脚本:

 

spool asm_diag1.txt
set pagesize 1000
set lines 500
col "Group Name"   form a25
col "Disk Name"    form a30
col "State"  form a15
col "Type"   form a7
col "Free GB"   form 9,999
alter session set nls_date_format='DD-MON-YYYY HH24:MI:SS';
select sysdate "Date and Time" from dual;
select * from v$asm_diskgroup order by 1;
select * from v$asm_disk order by 1, 2, 3;
select * from gv$asm_operation order by 1;
select * from v$version where banner like '%Database%' order by 1;
select * from gv$asm_client order by 1;
prompt
prompt ASM Disk Groups
prompt ===============
select group_number  "Group"
,      name          "Group Name"
,      state         "State"
,      type          "Type"
,      total_mb/1024 "Total GB"
,      free_mb/1024  "Free GB"
from   v$asm_diskgroup
/
prompt
prompt ASM Disks
prompt ==============
col "Group"          form 999
col "Disk"           form 999
col "Header"         form a9
col "Mode"           form a8
col "Redundancy"     form a10
col "Failure Group"  form a10
col "Path"           form a19
select group_number  "Group"
,      disk_number   "Disk"
,      header_status "Header"
,      mode_status   "Mode"
,      state         "State"
,      redundancy    "Redundancy"
,      total_mb      "Total MB"
,      free_mb       "Free MB"
,      name          "Disk Name"
,      failgroup     "Failure Group"
,      path          "Path"
from   v$asm_disk
order by group_number
,        disk_number
/
prompt
prompt Instances currently accessing these diskgroups
prompt ==============================================
select c.group_number  "Group"
,      g.name          "Group Name"
,      c.instance_name "Instance"
from   v$asm_client c
,      v$asm_diskgroup g
where  g.group_number=c.group_number
/
prompt
prompt Report the Percentage of Imbalance in all Mounted Diskgroups
prompt ==============================================
select dfail, count(dfail) from
(
select disk, count(failgroup) as dfail
from x$kfdpartner, v$asm_disk where
number_kfdpartner=disk_number and grp=group_number
group by disk, failgroup
)
group by dfail; 
select g.name as "GROUP", d.name as "DISK", d.failgroup, fcnt, pcnt,
decode(pcnt - fcnt, 0, 'MUST', 'SHOULD') as action from
(select gnum, DISK1, failgroup, count(failgroup) as fcnt from
(select gnum, DISK1
from
(
select d.group_number as gnum, disk as disk1,
count(distinct failgroup) as dfail
from x$kfdpartner, v$asm_disk_stat d where
number_kfdpartner=disk_number and grp=d.group_number
and active_kfdpartner=1
group by d.group_number, disk
), v$asm_disk_stat
where dfail < 3
and disk1=disk_number
and gnum=group_number),
x$kfdpartner, v$asm_disk_stat d where
number_kfdpartner=disk_number and grp=d.group_number and grp=gnum
and disk1=disk
and active_kfdpartner=1
group by gnum, disk1, failgroup),
(select grp, disk, count(disk) as pcnt from x$kfdpartner where
active_kfdpartner=1 group by grp, disk),
v$asm_diskgroup_stat g, v$asm_disk_stat d
where gnum=grp and gnum=g.group_number and gnum=d.group_number and
disk=disk1 and disk=disk_number and
((fcnt = 1 and (pcnt - fcnt) > 3) or ((pcnt - fcnt) = 0))
/
col TYPE form a15
col FILE_NUMBER form 9999 head FILE_NUM
col GROUP_NUMBER form 9999 head GR_NUM
col GB for 9999.99
select GROUP_NUMBER   ,
FILE_NUMBER          ,
COMPOUND_INDEX       ,
INCARNATION          ,
BLOCK_SIZE           ,
BLOCKS               ,
BYTES/1024/1024/1024 GB ,
TYPE                 ,
STRIPED              ,
CREATION_DATE        ,
MODIFICATION_DATE
from v$asm_file
where TYPE != 'ARCHIVELOG'
/
prompt
prompt free ASM disks and their paths
prompt ===========================
select header_status , mode_status, path from V$asm_disk
where header_status in ('FORMER','CANDIDATE')
/
show parameter asm
show parameter size
show parameter proc
show parameter cluster
show parameter instance_type
show parameter instance_name
show parameter pfile
show sga
spool off

 

Code to be run on the ASM instance. Use file asmdebug.sql

 

set newpage none
set linesize 100
spool /tmp/asmdebug.out
--
-- Get a timestamp
select rpad('>', 10, '>'), to_char(sysdate, 'MON DD HH24:MM:SS') from dual;
--
-- Diskgroup information
set head off
select 'Diskgroup Information' from dual;
set head on
column name format a15
column DG# format 99
select group_number DG#, name, state, type, total_mb, free_mb from
v$asm_diskgroup;
--
-- Get the # of Allocation Units  per DG
set head off
select 'Number of AUs per diskgroup' from dual;
set head on
select count(number_kfdat) AU_count, group_kfdat DG# from x$kfdat
group by group_kfdat;
--
-- Get the # of Allocation Units  per DiskGroup and Disk
set head off
select 'Number of AUs per Diskgroup,Disk' from dual;
col "group#,disk#" for a30
set head on
select count(*)AU_count, GROUP_KFDAT||','||number_kfdat "group#,disk#" from x$kfdat group by GROUP_KFDAT,number_kfdat;
--
-- Get the # of allocated (V) and free (F) Allocation Units
set head off
select 'Number of allocated (V) and free (F) Allocation Units' from dual;
col "VF" for a2
set head on
select GROUP_KFDAT "group#", number_kfdat "disk#", v_kfdat "VF", count(*)
from x$kfdat
group by GROUP_KFDAT, number_kfdat, v_kfdat;
--
-- Get the # of Allocation Units per ASM file
set head off
select 'Number of AUs per ASM file ordered by AU count for metadata only'
from dual;
set head on
select count(XNUM_KFFXP) AU_count,  NUMBER_KFFXP file#, GROUP_KFFXP DG# from x$kffxp where NUMBER_KFFXP < 256
group by NUMBER_KFFXP, GROUP_KFFXP
order by count(XNUM_KFFXP) ;
--
-- Get the # of Allocation Units per ASM file by file alias.  Change the
-- system_created Y|N depending if you want the short or long ASM name
set head off
select 'Number of AUs per ASM file ordered by AU count.  This is for non
metadata' from dual;
col name format a60
set head on
select GROUP_KFFXP, NUMBER_KFFXP, name, count(*)
from x$kffxp, v$asm_alias
where GROUP_KFFXP=GROUP_NUMBER and NUMBER_KFFXP=FILE_NUMBER and
system_created='Y'
     group by GROUP_KFFXP, NUMBER_KFFXP, name
     order by GROUP_KFFXP, NUMBER_KFFXP;
--
-- Get partner information.  This is really only useful if redundancy is other than
-- external.
set head off
select 'The following shows the disk to partner relationship.  This is really only
useful if using normal or high redundancy.' from dual;
set head on
select grp DG#, disk, NUMBER_KFDPARTNER partner, PARITY_KFDPARTNER parity, ACTIVE_KFDPARTNER active
from x$kfdpartner;
--
-- Another look at file utilization.
set head off
set linesize 132
select 'bytes is the sum of AUs with data in them * 1024^2
space is the sum of all AUs allocated for this file * 1024^2'
from dual;
set head on
col Name format a60
select f.group_number, f.file_number, bytes, space, space/(1024*1024) "InMB", a.name "Name"
from v$asm_file f, v$asm_alias a
where f.group_number=a.group_number and f.file_number=a.file_number
    and system_created='Y'
    order by f.group_number, f.file_number;
--
-- Get robust disk information
set linesize 400
col failgroup format a20
col label format a20
col name format a40
col path format a40
set head off
select 'Robust disk information' from dual;
set head on
select GROUP_NUMBER, DISK_NUMBER, INCARNATION, MOUNT_STATUS, HEADER_STATUS, MODE_STATUS, STATE, LIBRARY, TOTAL_MB, FREE_MB,
NAME, FAILGROUP, LABEL, PATH, CREATE_DATE, MOUNT_DATE, READS,
WRITES, READ_ERRS, WRITE_ERRS, READ_TIME, WRITE_TIME, BYTES_READ, BYTES_WRITTEN
from v$asm_disk;
--
spool off

 

 

Code to be executed on the database instances using this ASM instance. Use file rdbmsdebug.sql

 

set newpage none
spool /tmp/rdbmsdebug.out
--
-- Get a timestamp
select rpad('>', 10, '>'), to_char(sysdate, 'MON DD HH24:MM:SS') from dual;
--
-- Get datafile information as the database sees it
set head off
select 'V$DATAFILE information' from dual;
set head on
set linesize 132
col name format a60
select file#, name, block_size, blocks, bytes, bytes/(1024*1024) "InMB",
status from v$datafile;
--
-- Get controlfile information as the database sees it
set head off
select 'V$CONTROLFILE information' from dual;
set head on
select * from v$controlfile;
--
-- Get archivelog information as the database sees it
set head off
select 'GV$ARCHIVED_LOG information' from dual;
set head on
select name, thread#, sequence#, blocks*block_size "size", status
status from gv$archived_log
order by thread#,sequence#;
--
-- Get redolog information as the database sees it
set head off
select 'v$log and v$logfile information' from dual;
set head on
col member format a60
select a.group#, member, thread#, sequence#, bytes, a.status
from v$log a, v$logfile b where a.group# = b.group#
order by thread#;
--
-- Get tempfiles information as the database sees it
set head off
select 'GV$TEMPFILE information' from dual;
set head on
col name format a60
select INST_ID,TS#,FILE#, RFILE#,NAME, CREATION_CHANGE# , CREATION_TIME,  STATUS, BYTES, BLOCKS, CREATE_BYTES, BLOCK_SIZE
from gv$tempfile order by inst_id, file#;
spool off

 

Other items to collect:

System error logs
Alert logs from all database and ASM instances
All recent trace files from all databases involved and all of the ASM instances
All “.trc” files from the ASM instances

 

check ASM disk space

 

1) Determine which (if any) disks contain no free space (ie are below the threshold)
select group_kfdat "group #",
number_kfdat "disk #",
count(*) "# AU's"
from x$kfdat a
where v_kfdat = 'V'
and not exists (select *
from x$kfdat b
where a.group_kfdat = b.group_kfdat
and a.number_kfdat = b.number_kfdat
and b.v_kfdat = 'F')
group by GROUP_KFDAT, number_kfdat;
If no rows are returned ... the following query can also be used
select disk_number "Disk #", free_mb
from v$asm_disk
where group_number = *** disk group number ***
order by 2;
If rows are returned from the first query ... or FREE_MB is less than 100mb in the second ... then there is probably insufficient disk space to allow a rebalance to occur ... Note the Disk #'s for later
2) Determine which files have allocation units on the disk(s) that are on exhausted disks
select name, file_number
from v$asm_alias
where group_number in (select group_kffxp
from x$kffxp
where group_kffxp=*** disk group number ***
and disk_kffxp in (*** disk list from #1 above ***)
and au_kffxp != 4294967294
and number_kffxp >= 256)
and file_number in (select number_kffxp
from x$kffxp
where group_kffxp=*** disk group number ***
and disk_kffxp in (*** disk list from #1 above ***)
and au_kffxp != 4294967294
and number_kffxp >= 256)
and system_created='Y';
3) Free up space so that the rebalance can occur
Using the file list from #2 above ... we will need to either drop or move tablespace(s)/datafile(s) such that all disks that are exhausted have at least 100mb free ...
NOTE ... the AU count above ... should relate to 1mb AU size ... so if a single file ... with at least 100 au's can be dropped or moved ... this
should be sufficient to free up enough space to allow the rebalance to occur
Droppable tablespaces may be things like:
* temporary tablespaces
* index tablespaces (assuming you know how to rebuild the indexes)
If none of the tablespaces are droppable then the tablespace(s)/datafile(s) will need to be
* moved to another diskgroup (at least temporarily) ...
* dropped using RMAN (with the database shutdown) and will be restored later
Note 330103.1  How to Move Asm Database Files From one Diskgroup To Another ?
4) Check to see if there is sufficient FREE_MB on the problem disks
select disk_number "Disk #", free_mb
from v$asm_disk
where disk_group = *** disk group number ***
and disk_number in (*** disk list from #1 above ***)
order by 2;

 

Check Diskgroup Balance

If you want to perform an imbalance check for all mounted diskgroups, run the script in MOS Note 367445.1.

If you want to determine if you already have imbalance for a file in an existing diskgroup, use the following query:

select disk_kffxp, sum(size_kffxp) from x$kffxp where group_kffxp=AAA and number_kffxp=BBB and lxn_kffxp=0 group by disk_kffxp order by 2;

Breakdown of input/output is as follows:

  • AAA is the group_number in v$asm_alias
  • BBB is file_number in v$asm_alias
  • disk_kffxp gives us the disk number.
  • size_kffxp is used such that we account for variable sized extents.
  • sum(size_kffxp) provides the number of AUs that are on that disk.
  • lxn_kffxp is used in the query such that we go after only the primary extents, not secondary extents

If you want to check balance from an IO perspective, query the statistics in v$asm_disk_iostat before and after running a large SQL statement. For example, if the running a large query that does just reads, the reads and read_bytes columns should be roughly the same for all disks in the diskgroup.

Script:收集Oracle备份恢复信息

我们在诊断Oracle backup restore问题时总是希望能获得足够的诊断信息,一般来说RDA会是一个最好的诊断信息收集工具,但是有时候客户会很反感使用RDA(不信任感),这里我们提供一段专门用来收集oracle备份恢复信息的脚本。

运行以下脚本需要设置合理的”ORACLE_HOME、ORACLE_SID”环境变量,并设置NLS_DATE_FORMAT环境变量,如

NLS_DATE_FORMAT="DD-MON-RRRR HH24:MI:SS"
export NLS_DATE_FORMAT

以”rman target /”登陆并运行:

spool log to rman_report.log
set echo on
show all;
report schema;
list incarnation;
list backup summary;
list backup;
list copy;
report need backup;
report obsolete;
restore database preview;
spool log off

以下脚本在sqlplus中以sysdba身份执行,执行要求数据库至少处于mounted已加载状态下;注意该原始脚本是只读readonly的,它仅仅是读取数据字典,不会造成危害,当然请确保你的脚本来源!!

spool results01.txt
set echo on feedback on time on timing on pagesize 100 linesize 80 numwidth 13
show user
alter session set nls_date_format = 'DD-MON-YYYY HH24:MI:SS';
select * from v$version;
select to_char(sysdate, 'DD-MON-YYYY HH24:MI:SS') as current_date from dual;
column name format a30
column value format a49
select name, value from v$parameter where isdefault='FALSE' order by 1;
column parameter format a30
column value format a49
select * from v$nls_parameters order by parameter;
column name format a10
select dbid, name,
to_char(created, 'DD-MON-YYYY HH24:MI:SS') created,
open_mode, log_mode,
to_char(checkpoint_change#, '999999999999999') as checkpoint_change#,
controlfile_type,
to_char(controlfile_change#, '999999999999999') as controlfile_change#,
to_char(controlfile_time, 'DD-MON-YYYY HH24:MI:SS') controlfile_time,
to_char(resetlogs_change#, '999999999999999') as resetlogs_change#,
to_char(resetlogs_time, 'DD-MON-YYYY HH24:MI:SS') resetlogs_time
from v$database;
select * from v$instance;
archive log list;
select * from v$thread order by thread#;
select * from v$log order by first_change#;
column member format a45
select * from v$logfile;
column name format a79
select '#' || ts.name || '#' as tablespace_name, ts.ts#,
'#' || df.name || '#' as filename, df.file#, df.status, df.enabled, df.creation_change#,
to_char(df.creation_time, 'DD-MON-YYYY HH24:MI:SS') as creation_time,
to_char(df.checkpoint_change#, '999999999999999') as checkpoint_change#,
to_char(df.checkpoint_time, 'DD-MON-YYYY HH24:MI:SS') as checkpoint_time,
to_char(df.offline_change#, '999999999999999') as offline_change#,
to_char(df.online_change#, '999999999999999') as online_change#,
to_char(df.online_time, 'DD-MON-YYYY HH24:MI:SS') as online_time,
to_char(df.unrecoverable_change#, '999999999999999') as online_change#,
to_char(df.unrecoverable_time, 'DD-MON-YYYY HH24:MI:SS') as online_time,
to_char(df.bytes, '9,999,999,999,990') as bytes, block_size
from v$datafile df, v$tablespace ts
where ts.ts# = df.ts#
and ( df.status <> 'ONLINE'
or    df.checkpoint_change# <> (select checkpoint_change# from v$database) );
select '#' || ts.name || '#' as tablespace_name, ts.ts#,
'#' || dh.name || '#' as filename, dh.file#, dh.status, dh.error, dh.
fuzzy, dh.creation_change#,
to_char(dh.creation_time, 'DD-MON-YYYY HH24:MI:SS') as creation_time,
to_char(dh.checkpoint_change#, '999999999999999') as checkpoint_change#,
to_char(dh.checkpoint_time, 'DD-MON-YYYY HH24:MI:SS') as checkpoint_time,
to_char(dh.resetlogs_change#, '999999999999999') as resetlogs_change#,
to_char(dh.resetlogs_time, 'DD-MON-YYYY HH24:MI:SS') as resetlogs_time,
to_char(dh.bytes, '9,999,999,999,990') as bytes
from v$datafile_header dh, v$tablespace ts
where ts.ts# = dh.ts#
and ( dh.status <> 'ONLINE'
or    dh.checkpoint_change# <> (select checkpoint_change# from v$database) );
select * from v$tempfile;
select HXFIL File_num,substr(HXFNM,1,60) file_name, FHTNM tablespace_name,
FHTYP type, HXERR validity,
FHSCN SCN, FHTIM SCN_Time, FHSTA status,
FHTHR Thread, FHRBA_SEQ Sequence
from X$KCVFH
--where HXERR > 0
order by HXERR, FHSTA, FHSCN, HXFIL;
column error format a15
select error, fuzzy, status, checkpoint_change#,
to_char(checkpoint_time, 'DD-MON-YYYY HH24:MI:SS') as checkpoint_time,
count(*)
from v$datafile_header
group by error, fuzzy, status, checkpoint_change#, checkpoint_time
order by checkpoint_change#, checkpoint_time;
select * from V$INSTANCE_RECOVERY;
select * from v$recover_file order by change#;
select * from dba_tablespaces where status <> 'ONLINE';
SELECT * FROM database_properties order by property_name;
select *
from X$KCCLH, (select min(checkpoint_change#) df_min_scn,
min(checkpoint_change#) df_max_scn
from v$datafile_header
where status='ONLINE') df
where LHLOS in (select first_change# from v$log)
or df.df_min_scn between LHLOS and LHNXS
or df.df_max_scn between LHLOS and LHNXS;
select * from v$backup where status <> 'NOT ACTIVE';
select ADDR, XIDUSN, XIDSLOT, XIDSQN,
UBAFIL, UBABLK, UBASQN,
START_UBAFIL, START_UBABLK, START_UBASQN,
USED_UBLK, STATUS
from   v$transaction;
select * from v$archive_gap;
select * from v$archive_dest_status where recovery_mode <> 'IDLE';
column USED_GB format 999,990.999
column USED% format 990.99
column RECLAIM_GB format 999,990.999
column RECLAIMABLE% format 990.99
column LIMIT_GB format 999,990.999
select frau.file_type as type,
frau.percent_space_used/100 * rfd.space_limit /1024/1024/1024 "USED_GB",
frau.percent_space_used "USED%",
frau.percent_space_reclaimable "RECLAIMABLE%",
frau.percent_space_reclaimable/100 * rfd.space_limit /1024/1024/1024 "RECLAIM_GB",
frau.number_of_files "FILES#"
from   v$flash_recovery_area_usage frau,
v$recovery_file_dest rfd
order by file_type;
select name,
space_limit/1024/1024/1024 "LIMIT_GB",
space_used/1024/1024/1024 "USED_GB",
space_used/space_limit*100 "USED%",
space_reclaimable/1024/1024/1024 "RECLAIM_GB",
number_of_files "FILE#"
from   v$recovery_file_dest;
select * from v$backup_corruption;
select * from v$copy_corruption order by file#, block#;
select * from v$database_block_corruption order by file#, block#;
SELECT f.file#, f.name,
e.tablespace_name, e.segment_type, e.owner, e.segment_name,
c.file#, c.block#, c.blocks, c.corruption_change#, c.corruption_type
FROM dba_extents e, V$database_block_corruption c, v$datafile f
WHERE c.file# = f.file#
and   e.file_id = c.file#
and   c.block# between e.block_id AND e.block_id + e.blocks - 1;
select * from v$database_incarnation;
select * from v$rman_configuration;
select s.recid as bs_key, p.recid as bp_key, p.status, p.tag, p.device_type,
p.handle, p.media, p.completion_time, p.bytes
from   v$backup_piece p, v$backup_set s
where  p.set_stamp = s.set_stamp
and    s.controlfile_included='YES'
order by p.completion_time;
select s.recid as bs_key, p.recid as bp_key, p.status, p.tag, p.device_type,
p.handle, p.media, p.completion_time, f.absolute_fuzzy_change#, p.bytes
from   v$backup_datafile f, v$backup_piece p, v$backup_set s
where  p.set_stamp = s.set_stamp
and    f.set_stamp = s.set_stamp
and    p.handle is not null
and    f.file# = 1
order by p.completion_time;
SELECT
session_recid,
input_bytes_per_sec_display,
output_bytes_per_sec_display,
time_taken_display,
end_time
FROM v$rman_backup_job_details
ORDER BY end_time;
select * from v$filestat;
column EBS_MB format 9,990.99
column TOTAL_MB format 999,990.99
select SID, SERIAL, FILENAME, EFFECTIVE_BYTES_PER_SECOND/1024/1024 as EBS_MB,
OPEN_TIME, CLOSE_TIME, ELAPSED_TIME, TOTAL_BYTES/1024/1024 as TOTAL_MB,
STATUS, MAXOPENFILES, buffer_size, buffer_count
from v$backup_async_io
where close_time >= sysdate-3
order by close_time;
select SID, SERIAL, FILENAME, EFFECTIVE_BYTES_PER_SECOND/1024/1024 as EBS_MB,
OPEN_TIME, CLOSE_TIME, ELAPSED_TIME, TOTAL_BYTES/1024/1024 as TOTAL_MB,
STATUS, MAXOPENFILES, buffer_size, buffer_count
from v$backup_sync_io
where close_time >= sysdate-3;
select * from v$controlfile_record_section order by type;
select to_char(rownum) || '. ' || output rman_output from v$rman_output;
select * from v$rman_status where trunc(end_time) > trunc(sysdate)-3;
select protection_mode, protection_level from v$database;
select * from v$recovery_progress;
select s.client_info,
sl.message,
sl.sid, sl.serial#, p.spid,
round(sl.sofar/sl.totalwork*100,2) "% Complete"
from   v$session_longops sl, v$session s, v$process p
where  p.addr = s.paddr
and    sl.sid=s.sid
and    sl.serial#=s.serial#
and    opname LIKE 'RMAN%'
and    opname NOT LIKE '%aggregate%'
and    totalwork != 0
and    sofar <> totalwork;
select AL.*,
DF.min_checkpoint_change#, DF.min_checkpoint_time
from v$archived_log AL,
(select min(checkpoint_change#) min_checkpoint_change#,
min(checkpoint_time) min_checkpoint_time
from v$datafile_header
where status='ONLINE') DF
where DF.min_checkpoint_change# between AL.first_change# and AL.next_change#
order by AL.first_change#;
select * from v$asm_diskgroup;
select * from v$asm_disk;
select * from v$flashback_database_log;
select * from v$flashback_database_logfile order by first_change# desc;
select * from v$flashback_database_stat order by begin_time desc;
select * from v$restore_point;
select * from v$rollname;
select * from v$undostat;
select * from dba_rollback_segs;
spool off

Script:收集UNDO诊断信息

以下脚本可以用于收集Automatic Undo Management的必要诊断信息,以sysdba身份运行:

spool Undo_Diag.out  
ttitle off
set pages 999
set lines 150
set verify off 
set termout off
set trimout on
set trimspool on
REM   
REM ------------------------------------------------------------------------  
REM   
REM  -----------------------------------------------------------------  
REM  
set space 2  
REM  REPORTING TABLESPACE INFORMATION: 
REM   
REM  This looks at Tablespace Sizing - Total bytes and free bytes  
REM   
column tablespace_name  format a30            heading 'TS Name'  
column sbytes           format 9,999,999,999  heading 'Total MBytes'  
column fbytes           format 9,999,999,999  heading 'Free MBytes'  
column file_name        format a30            heading 'File Name'
column kount            format 999            heading 'Ext'  
compute sum of fbytes on tablespace_name  
compute sum of sbytes on tablespace_name  
compute sum of sbytes on report  
compute sum of fbytes on report  
break on tablespace_name skip 2  
select a.tablespace_name,  a.file_name,  round(a.bytes/1024/1024,0) sbytes,  
round(sum(b.bytes/1024/1024),0) fbytes,  count(*) kount, autoextensible  
from   dba_data_files a,  dba_free_space b  
where  a.file_id  =  b.file_id  
and a.tablespace_name in (select z.tablespace_name from dba_tablespaces z where retention like '%GUARANTEE')
group  by a.tablespace_name, a.file_name, a.bytes, autoextensible
order  by a.tablespace_name  
/  
set linesize 160  
REM   
REM  If you can significantly reduce physical reads by adding incremental  
REM  data buffers...do it.  To determine whether adding data buffers will  
REM  help, set db_block_lru_statistics = TRUE and  
REM  db_block_lru_extended_statistics = TRUE in the init.ora parameters.  
REM  You can determine how many extra hits you would get from memory as  
REM  opposed to physical I/O from disk.  **NOTE:  Turning these on will  
REM  impact performance.  One shift of statistics gathering should be enough  
REM  to get the required information.  
REM   
REM   
REM  -----------------------------------------------------------------  
REM
set lines 160
col tablespace_name format a30 heading "Tablespace"
col tb format a15 heading "TB Status"
col df format a10 heading "DF Status"
col extent_management format a15 heading "Extent|Management"
col allocation_type format a8 heading "Type"
col segment_space_management format a7 heading "Auto|Segment"
col retention format a11 heading "Retention|Level"
col autoextensible format a5 heading "Auto?"
col mx format 999,999,999 heading "Max Allowed"
select t.tablespace_name, t.status tb, d.status df,
extent_management, allocation_type, segment_space_management, retention,
autoextensible, (maxbytes/1024/1024) mx
from dba_tablespaces t, dba_data_files d
where t.tablespace_name = d.tablespace_name
and retention like '%GUARANTEE'
/
col status format a20 head "Status"
col cnt format 999,999,999 head "How Many?"
select status, count(*) cnt
from dba_rollback_segs
group by status
/
set termout on
set trimout off
set trimspool off
set lines 120
set pages 999
set termout off
set trimout on
set trimspool on
alter session set nls_date_format='dd-Mon-yyyy hh24:mi';
prompt
prompt  ############## RUNTIME ############## 
prompt
col rdate head "Run Time"
select sysdate rdate from dual;
prompt 
prompt  ############## DATAFILES ############## 
prompt 
col retention head "Retention"
col tablespace_name format a30 head "TBSP Name"
col file_id format 999 head "File #"
col a format 999,999,999,999,999 head "Bytes Alloc (MB)"
col b format 999,999,999,999,999 head "Max Bytes Used (MB)"
col autoextensible head "Auto|Ext"
col extent_management head "Ext Mngmnt"
col allocation_type head "Type"
col segment_space_management head "SSM"
select tablespace_name, file_id, sum(bytes)/1024/1024 a, 
sum(maxbytes)/1024/1024 b, 
autoextensible
from dba_data_files
where tablespace_name in (select tablespace_name from dba_tablespaces
where retention like '%GUARANTEE' )
group by file_id, tablespace_name, autoextensible
order by tablespace_name
/
set termout on
set trimout off
set trimspool off
ttitle off
set pages 999
set lines 150
set verify off 
set termout off
set trimout on
set trimspool on
REM   
REM ------------------------------------------------------------------------  
REM   
REM  -----------------------------------------------------------------  
REM  
REM
REM  REPORTING UNDO EXTENTS INFORMATION:  
REM   
REM  -----------------------------------------------------------------  
REM 
REM  Undo Extents breakdown information
REM
ttitle center "Rollback Segments Breakdown" skip 2
col status format a20
col cnt format 999,999,999 head "How Many?"
select status, count(*) cnt from dba_rollback_segs
group by status
/
ttitle center "Undo Extents" skip 2
col segment_name format a30 heading "Name"
col "ACT BYTES" format 999,999,999,999 head "Active|Extents"
col "UNEXP BYTES" format 999,999,999,999 head "Unxpired|Extents"
col "EXP BYTES" format 999,999,999,999 head "Expired|Extents"
select segment_name,
nvl(sum(act),0) "ACT BYTES",
nvl(sum(unexp),0) "UNEXP BYTES",
nvl(sum(exp),0) "EXP BYTES"
from (
select segment_name,
nvl(sum(bytes),0) act,00 unexp, 00 exp
from DBA_UNDO_EXTENTS
where status='ACTIVE' group by segment_name
union
select segment_name,
00 act, nvl(sum(bytes),0) unexp, 00 exp
from DBA_UNDO_EXTENTS
where status='UNEXPIRED' group by segment_name
union
select segment_name,
00 act, 00 unexp, nvl(sum(bytes),0) exp
from DBA_UNDO_EXTENTS
where status='EXPIRED' group by segment_name
) group by segment_name;
ttitle center "Undo Extents Statistics" skip 2
col size format 999,999,999,999 heading "Size"
col "HOW MANY" format 999,999,999 heading "How Many?"
col st heading a12 heading "Status"
select distinct status st, count(*) "HOW MANY", sum(bytes) "SIZE"
from dba_undo_extents
group by status
/
col segment_name format a30 heading "Name"
col TABLESPACE_NAME for a20
col BYTES for 999,999,999,999
col BLOCKS for 999,999,999
col status for a15 heading "Status"
col segment_name heading "Segment"
col extent_id heading "ID"
select SEGMENT_NAME, TABLESPACE_NAME, EXTENT_ID, 
FILE_ID, BLOCK_ID, BYTES, BLOCKS, STATUS
from dba_undo_extents
order by 1,3,4,5
/
REM
REM  -----------------------------------------------------------------  
REM 
REM  Undo Extents Contention breakdown
REM  Take out column TUNED_UNDORETENTION if customer 
REM   prior to 10.2.x
REM
REM   The time frame can be adjusted with this query
REM   By default using around 4 hour window of time
REM
REM   Ex.
REM   Using sysdate-.04 looking at the last hour
REM   Using sysdate-.16 looking at the last 4 hours
REM   Using sysdate-.32 looking at the last 8 hours
REM   Using sysdate-1 looking at the last 24 hours
REM
set linesize 140
ttitle center "Undo Extents Error Conditions (Default - Last 4 Hours)" skip 2
col UNXPSTEALCNT format 999,999,999  heading "# Unexpired|Stolen"
col EXPSTEALCNT format 999,999,999   heading "# Expired|Reused"
col SSOLDERRCNT format 999,999,999   heading "ORA-1555|Error"
col NOSPACEERRCNT format 999,999,999 heading "Out-Of-space|Error"
col MAXQUERYLEN format 999,999,999   heading "Max Query|Length"
col TUNED_UNDORETENTION format 999,999,999  heading "Auto-Ajusted|Undo Retention"
col hours format 999,999 heading "Tuned|(HRs)"
select inst_id, to_char(begin_time,'MM/DD/YYYY HH24:MI') begin_time, 
UNXPSTEALCNT, EXPSTEALCNT , SSOLDERRCNT, NOSPACEERRCNT, MAXQUERYLEN,
TUNED_UNDORETENTION, TUNED_UNDORETENTION/60/60 hours
from gv$undostat
where begin_time between (sysdate-.16) 
and sysdate
order by inst_id, begin_time
/
set termout on
set trimout off
set trimspool off
ttitle off
set pages 999
set lines 150
set verify off 
set termout off
set trimout on
set trimspool on
REM   
REM ------------------------------------------------------------------------  
col name format a30  
col gets format 9,999,999  
col waits format 9,999,999  
PROMPT  ROLLBACK HIT STATISTICS:  
REM   
REM  GETS - # of gets on the rollback segment header 
REM  WAITS - # of waits for the rollback segment header  
set head on;  
select name, waits, gets  
from   v$rollstat, v$rollname  
where  v$rollstat.usn = v$rollname.usn  
/  
col pct head "< 2% ideal"
select 'The average of waits/gets is '||  
round((sum(waits) / sum(gets)) * 100,2)||'%' PCT 
From    v$rollstat  
/  
PROMPT  REDO CONTENTION STATISTICS:
REM   
REM  If the ratio of waits to gets is more than 1% or 2%, consider  
REM  creating more rollback segments  
REM   
REM  Another way to gauge rollback contention is:  
REM   
column xn1 format 9999999  
column xv1 new_value xxv1 noprint  
select class, count  
from   v$waitstat  
where  class in ('system undo header', 'system undo block', 
'undo header',        'undo block'          )  
/  
set head off
select 'Total requests = '||sum(count) xn1, sum(count) xv1  
from    v$waitstat  
/  
select 'Contention for system undo header = '||  
(round(count/(&xxv1+0.00000000001),4)) * 100||'%'  
from  v$waitstat  
where   class = 'system undo header'  
/  
select 'Contention for system undo block = '||  
(round(count/(&xxv1+0.00000000001),4)) * 100||'%'  
from    v$waitstat  
where   class = 'system undo block'  
/  
select 'Contention for undo header = '||  
(round(count/(&xxv1+0.00000000001),4)) * 100||'%'  
from    v$waitstat  
where   class = 'undo header'  
/  
select 'Contention for undo block = '||  
(round(count/(&xxv1+0.00000000001),4)) * 100||'%'  
from    v$waitstat  
where   class = 'undo block'  
/  
REM   
REM  NOTE: Not as useful with AUM configured 
REM 
REM  If the percentage for an area is more than 1% or 2%, consider  
REM  creating more rollback segments.  Note:  This value is usually very  
REM  small 
REM  and has been rounded to 4 places.  
REM   
REM ------------------------------------------------------------------------  
REM   
REM  The following shows how often user processes had to wait for space in  
REM  the redo log buffer:  
select name||' = '||value  
from   v$sysstat  
where  name = 'redo log space requests'  
/  
REM   
REM  This value should be near 0.  If this value increments consistently,  
REM  processes have had to wait for space in the redo buffer.  If this  
REM  condition exists over time, increase the size of LOG_BUFFER in the  
REM  init.ora file in increments of 5% until the value nears 0.  
REM  ** NOTE: increasing the LOG_BUFFER value will increase total SGA size.  
REM   
REM  -----------------------------------------------------------------------  
col name format a15  
col gets format 9999999  
col misses format 9999999  
col immediate_gets heading 'IMMED GETS' format 9999999  
col immediate_misses heading 'IMMED MISS' format 9999999  
col sleeps format 999999  
PROMPT  LATCH CONTENTION:  
REM   
REM  GETS - # of successful willing-to-wait requests for a latch  
REM  MISSES - # of times an initial willing-to-wait request was unsuccessful  
REM  IMMEDIATE_GETS - # of successful immediate requests for each latch  
REM  IMMEDIATE_MISSES = # of unsuccessful immediate requests for each latch  
REM  SLEEPS - # of times a process waited and requests a latch after an  
REM           initial willing-to-wait request  
REM   
REM  If the latch requested with a willing-to-wait request is not  
REM  available, the requesting process waits a short time and requests  
REM  again.  
REM  If the latch requested with an immediate request is not available,  
REM  the requesting process does not wait, but continues processing  
REM   
set head on  
select name,          gets,              misses,  
immediate_gets,  immediate_misses,  sleeps  
from   v$latch  
where  name in ('redo allocation',  'redo copy')  
/  
set head off 
select 'Ratio of MISSES to GETS: '||  
round((sum(misses)/(sum(gets)+0.00000000001) * 100),2)||'%'  
from    v$latch  
where   name in ('redo allocation',  'redo copy')  
/  
select 'Ratio of IMMEDIATE_MISSES to IMMEDIATE_GETS: '||  
round((sum(immediate_misses)/  
(sum(immediate_misses+immediate_gets)+0.00000000001) * 100),2)||'%' 
from    v$latch  
where   name in ('redo allocation',  'redo copy')  
/  
set head on
REM   
REM  If either ratio exceeds 1%, performance will be affected.  
REM   
REM  Decreasing the size of LOG_SMALL_ENTRY_MAX_SIZE reduces the number of  
REM  processes copying information on the redo allocation latch.  
REM   
REM  Increasing the size of LOG_SIMULTANEOUS_COPIES will reduce contention  
REM  for redo copy latches.  
REM   
REM  -----------------------------------------------------------------  
REM  This looks at overall i/o activity against individual  
REM  files within a tablespace  
REM   
REM  Look for a mismatch across disk drives in terms of I/O  
REM   
REM  Also, examine the Blocks per Read Ratio for heavily accessed  
REM  TSs - if this value is significantly above 1 then you may have  
REM  full tablescans occurring (with multi-block I/O)  
REM   
REM  If activity on the files is unbalanced, move files around to balance  
REM  the load.  Should see an approximately even set of numbers across files  
REM   
set space 1  
PROMPT  REPORTING I/O STATISTICS:
column pbr       format 99999999  heading 'Physical|Blk Read'  
column pbw       format 999999    heading 'Physical|Blks Wrtn'  
column pyr       format 999999    heading 'Physical|Reads'  
column readtim   format 99999999  heading 'Read|Time'  
column name      format a55       heading 'DataFile Name'  
column writetim  format 99999999  heading 'Write|Time'  
compute sum of f.phyblkrd, f.phyblkwrt on report  
select fs.name name,  f.phyblkrd pbr,  f.phyblkwrt pbw, 
f.readtim,     f.writetim  
from   v$filestat f, v$datafile fs  
where  f.file#  =  fs.file#  
order  by fs.name  
/  
REM   
REM  -----------------------------------------------------------------  
PROMPT  GENERATING WAIT STATISTICS:  
REM   
REM  This will show wait stats for certain kernel instances.  This  
REM  may show the need for additional rbs, wait lists, db_buffers  
REM   
column class  heading 'Class Type'  
column count  heading 'Times Waited'  format 99,999,999 
column time   heading 'Total Times'   format 99,999,999  
select class,  count,  time  
from   v$waitstat  
where  count > 0  
order  by class  
/  
REM   
REM  Look at the wait statistics generated above (if any). They will  
REM  tell you where there is contention in the system.  There will  
REM  usually be some contention in any system - but if the ratio of  
REM  waits for a particular operation starts to rise, you may need to  
REM  add additional resource, such as more database buffers, log buffers,  
REM  or rollback segments  
REM   
REM  -----------------------------------------------------------------  
PROMPT  ROLLBACK EXTENT STATISTICS:  
REM   
column usn        format 999          heading 'Undo #'
column extents    format 999          heading 'Extents'  
column rssize     format 999,999,999  heading 'Size in|Bytes'  
column optsize    format 999,999,999  heading 'Optimal|Size'  
column hwmsize    format 99,999,999   heading 'High Water|Mark'  
column shrinks    format 9,999        heading 'Num of|Shrinks'  
column wraps      format 9,999        heading 'Num of|Wraps'  
column extends    format 999,999      heading 'Num of|Extends'  
column aveactive  format 999,999,999  heading 'Average size|Active Extents'  
column rownum noprint  
select usn, extents, rssize,    optsize,  hwmsize,  
shrinks,   wraps,    extends,  aveactive  
from   v$rollstat  
order  by rownum  
/  
set termout on
set trimout off
set trimspool off
set lines 120
set pages 999
set termout off
set trimout on
set trimspool on
prompt
prompt  ############## RUNTIME ############## 
prompt
col rdate head "Run Time"
select sysdate rdate from dual;
prompt 
prompt  ############## HISTORICAL DATA ############## 
prompt 
col x format 999,999 head "Max Concurrent|Last 7 Days"
col y format 999,999 head "Max Concurrent|Since Startup"
select max(maxconcurrency) x from v$undostat
/
select max(maxconcurrency) y from sys.wrh$_undostat
/
col i format 999,999 head "1555 Errors"
col j format 999,999 head "Undo Space Errors"
select sum(ssolderrcnt) i from v$undostat
where end_time > sysdate-2
/
select sum(nospaceerrcnt) j from v$undostat
where end_time > sysdate-2
/
prompt 
prompt  ############## CURRENT STATUS OF SEGMENTS  ############## 
prompt  ##############   SNAPSHOT IN TIME INFO     ##############
prompt  ##############(SHOWS CURRENT UNDO ACTIVITY)##############
prompt 
col segment_name format a30 head "Segment Name"
col "ACT BYTES" format 999,999,999,999 head "Active Bytes"
col "UNEXP BYTES" format 999,999,999,999 head "Unexpired Bytes"
col "EXP BYTES" format 999,999,999,999 head "Expired Bytes"
select segment_name, nvl(sum(act),0) "ACT BYTES", 
nvl(sum(unexp),0) "UNEXP BYTES",
nvl(sum(exp),0) "EXP BYTES"
from (select segment_name, nvl(sum(bytes),0) act,00 unexp, 00 exp
from dba_undo_extents where status='ACTIVE' group by segment_name
union 
select segment_name, 00 act, nvl(sum(bytes),0) unexp, 00 exp
from dba_undo_extents where status='UNEXPIRED' group by segment_name
union
select segment_name, 00 act, 00 unexp, nvl(sum(bytes),0) exp
from dba_undo_extents where status='EXPIRED' group by segment_name)
group by segment_name
order by 1
/
prompt 
prompt  ############## UNDO SPACE USAGE ############## 
prompt 
col usn format 999,999 head "Segment#"
col shrinks format 999,999,999 head "Shrinks"
col aveshrink format 999,999,999 head "Avg Shrink Size"
select usn, shrinks, aveshrink from v$rollstat
/
set termout on
set trimout off
set trimspool off
set pages 999
set termout off
set trimout on
set trimspool on
prompt
prompt  ############## RUNTIME ############## 
prompt
col rdate head "Run Time"
select sysdate rdate from dual;
col inst_id format 999 head "Instance #"
col Parameter format a35 wrap
col "Session Value" format a25 wrapped
col "Instance Value" format a25 wrapped
prompt
prompt  ############## PARAMETERS ############## 
prompt
select  a.inst_id, a.ksppinm  "Parameter",
b.ksppstvl "Session Value",
c.ksppstvl "Instance Value"
from x$ksppi a, x$ksppcv b, x$ksppsv c
where a.indx = b.indx and a.indx = c.indx
and a.inst_id=b.inst_id and b.inst_id=c.inst_id
and a.ksppinm in ('_undo_autotune', '_smu_debug_mode',
'_highthreshold_undoretention',
'undo_tablespace','undo_retention','undo_management')
order by 2;
set termout on
set trimout off
set trimspool off
set pages 999
set termout off
set trimout on
set trimspool on
prompt
prompt  ############## RUNTIME ############## 
prompt
col rdate head "Run Time"
select sysdate rdate from dual;
prompt 
prompt  ############## WAITS FOR UNDO (Since Startup) ############## 
prompt 
col inst_id head "Instance#"
col eq_type format a3 head "Enq"
col total_req# format 999,999,999,999,999,999 head "Total Requests"
col total_wait# format 999,999 head "Total Waits"
col succ_req# format 999,999,999,999,999,999 head "Successes"
col failed_req# format 999,999,999999 head "Failures"
col cum_wait_time format 999,999,999 head "Cummalitve|Time"
select * from v$enqueue_stat where eq_type='US'
union
select * from v$enqueue_stat where eq_type='HW'
/
prompt 
prompt  ############## LOCKS FOR UNDO ############## 
prompt 
col addr head "ADDR"
col KADDR head "KADDR"
col sid head "Session"
col osuser format a10 head "OS User"
col machine format a15 head "Machine"
col program format a17 head "Program"
col process format a7 head "Process"
col lmode head "Lmode"
col request head "Request"
col ctime format 9,999 head "Time|(Mins)"
col block head "Blocking?"
select /*+ RULE */  a.SID, b.process,
b.OSUSER,  b.MACHINE,  b.PROGRAM, 
addr, kaddr, lmode, request, round(ctime/60/60,0) ctime, block 
from 
v$lock a, 
v$session b 
where 
a.sid=b.sid
and a.type='US'
/
prompt 
prompt  ############## TUNED RETENTION HISTORY (Last 2 Days) ############## 
prompt  ##############        LOWEST AND HIGHEST DATA        ############## 
prompt 
col low format 999,999,999,999 head "Undo Retention|Lowest Tuned Value"
col high format 999,999,999,999 head "Undo Retention|Highest Tuned Value"
select end_time, tuned_undoretention from v$undostat where tuned_undoretention = (
select min(tuned_undoretention) low
from v$undostat
where end_time > sysdate-2)
/
select end_time, tuned_undoretention from v$undostat where tuned_undoretention = (
select max(tuned_undoretention) high
from v$undostat
where end_time > sysdate-2)
/
prompt 
prompt  ############## CURRENT TRANSACTIONS ############## 
prompt 
col sql_text format a40 word_wrapped head "SQL Code"
select a.start_date, a.start_scn, a.status, c.sql_text
from v$transaction a, v$session b, v$sqlarea c
where b.saddr=a.ses_addr and c.address=b.sql_address
and b.sql_hash_value=c.hash_value
/
select current_scn from v$database
/
col a format 999,999 head "UnexStolen"
col b format 999,999 head "ExStolen"
col c format 999,999 head "UnexReuse"
col d format 999,999 head "ExReuse"
prompt 
prompt  ############## WHO'S STEALING WHAT? (Last 2 Days) ############## 
prompt 
select unxpstealcnt a, expstealcnt b,
unxpblkreucnt c, expblkreucnt d
from v$undostat
where (unxpstealcnt > 0 or expstealcnt > 0)
and end_time > sysdate-2
/
set termout on
set trimout off
set trimspool off
set pages 999
set termout off
set trimout on
set trimspool on
prompt
prompt  ############## RUNTIME ############## 
prompt
col rdate head "Run Time"
select sysdate rdate from dual;
col current_scn head "SCN Now"
col start_date head "Trans Started"
col start_scn head "SCN for Trans"
col ses_addr head "ADDR"
prompt 
prompt  ############## Historical V$UNDOSTAT (Last 2 Days) ############## 
prompt 
col end_time format a18 Head "Date/Time"
col maxq format 999,999 head "Query|Maximum|Minutes"
col maxquerysqlid head "SqlID"
col undotsn format 999,999 head "TBS"
col undoblks format 999,999,999 head "Undo|Blocks"
col txncount format 999,999,999 head "# of|Trans"
col unexpiredblks format 999,999,999 head "# of Unexpired"
col expiredblks format 999,999,999 head "# of Expired"
col tuned format 999,999 head "Tuned Retention|(Minutes)"
select end_time, round(maxquerylen/60,0) maxq, maxquerysqlid,
undotsn, undoblks, txncount, unexpiredblks, expiredblks, 
round(tuned_undoretention/60,0) Tuned
from dba_hist_undostat
where end_time > sysdate-2
order by 1
/
prompt 
prompt  ############## RECENT MISSES FOR UNDO (Last 2 Days) ############## 
prompt 
set lines 500
select * from v$undostat where maxquerylen > tuned_undoretention
and end_time > sysdate-2
order by 2
/
select * from sys.wrh$_undostat where maxquerylen > tuned_undoretention
and end_time > sysdate-2
order by 2
/
prompt 
prompt  ############## AUTO-TUNING TUNE-DOWN DATA    ############## 
prompt  ############## ROLLBACK DATA (Since Startup) ############## 
prompt 
col name format a60 head "Name"
col value format 999,999,999 head "Counters"
select name, value from v$sysstat
where name like '%down retention%' or name like 'une down%'
or name like '%undo segment%' or name like '%rollback%'
or name like '%undo record%'
/
prompt 
prompt  ############## Long Running Query History ############## 
prompt 
col end_time head "Date"
col maxquerysqlid head "SQL ID"
col runawayquerysqlid format a15 head "Runaway SQL ID"
col results format a35 word_wrapped head "Space Issues"
col status head "Status"
col newret head "Tuned Down|Retention"
select end_time, maxquerysqlid, runawayquerysqlid, status,
decode(status,1,'Slot Active',4,'Reached Best Retention',5,'Reached Best Retention',
8, 'Runaway Query',9,'Runaway Query-Active',10,'Space Pressure',
11,'Space Pressure Currently',
16, 'Tuned Down (to undo_retention) due to Space Pressure', 
17,'Tuned Down (to undo_retention) due to Space Pressure-Active',
18, 'Tuning Down due to Runaway', 19, 'Tuning Down due to Runaway-Active',
28, 'Runaway tuned down to last tune down value',
29, 'Runaway tuned down to last tune down value',
32, 'Max Tuned Down - Not Auto-Tuning',
33, 'Max Tuned Down - Not Auto-Tuning (Active)',
37, 'Max Tuned Down - Not Auto-Tuning (Active)', 
38, 'Max Tuned Down - Not Auto-Tuning', 
39, 'Max Tuned Down - Not Auto-Tuning (Active)', 
40, 'Max Tuned Down - Not Auto-Tuning', 
41, 'Max Tuned Down - Not Auto-Tuning (Active)', 
42, 'Max Tuned Down - Not Auto-Tuning', 
44, 'Max Tuned Down - Not Auto-Tuning', 
45, 'Max Tuned Down - Not Auto-Tuning (Active)', 
'Other ('||status||')') Results, spcprs_retention NewRet
from sys.wrh$_undostat
where status > 1
/
prompt 
prompt  ############## Details on Long Run Queries ############## 
prompt 
col sql_fulltext head "SQL Text"
Col sql_id heading "SQL ID"
select sql_id, sql_fulltext, last_load_time "Last Load", 
round(elapsed_time/60/60/24,0) "Elapsed Days" 
from v$sql where sql_id in 
(select maxquerysqlid from sys.wrh$_undostat 
where status > 1)
/
set termout on
set trimout off
set trimspool off
set pages 999
set termout off
set trimout on
set trimspool on
prompt
prompt  ############## RUNTIME ############## 
prompt
col rdate head "Run Time"
select sysdate rdate from dual;
prompt 
prompt  ############## IN USE Undo Data ############## 
prompt 
select 
((select (nvl(sum(bytes),0)) 
from dba_undo_extents 
where tablespace_name in (select tablespace_name from dba_tablespaces
where retention like '%GUARANTEE' )
and status in ('ACTIVE','UNEXPIRED')) *100) / 
(select sum(bytes) 
from dba_data_files 
where tablespace_name in (select tablespace_name from dba_tablespaces
where retention like '%GUARANTEE' )) "PCT_INUSE" 
from dual; 
select tablespace_name, extent_management, allocation_type,
segment_space_management, retention
from dba_tablespaces where retention like '%GUARANTEE'
/
col c format 999,999,999,999 head "Sum of Free"
select (nvl(sum(bytes),0)) c from dba_free_space
where tablespace_name in
(select tablespace_name from dba_tablespaces where retention like '%GUARANTEE')
/
col d format 999,999,999,999 head "Total Bytes"
select sum(bytes) d from dba_data_files
where tablespace_name in
(select tablespace_name from dba_tablespaces where retention like '%GUARANTEE')
/
PROMPT
PROMPT  ############## UNDO SEGMENTS ############## 
PROMPT
col status head "Status"
col z format 999,999 head "Total Extents"
break on report
compute sum on report of z
select status, count(*) z from dba_undo_extents
group by status
/
col z format 999,999 head "Undo Segments"
select status, count(*) z from dba_rollback_segs
group by status
/
prompt 
prompt  ############## CURRENT STATUS OF SEGMENTS  ############## 
prompt  ##############   SNAPSHOT IN TIME INFO     ##############
prompt  ##############(SHOWS CURRENT UNDO ACTIVITY)##############
prompt 
col segment_name format a30 head "Segment Name"
col "ACT BYTES" format 999,999,999,999 head "Active Bytes"
col "UNEXP BYTES" format 999,999,999,999 head "Unexpired Bytes"
col "EXP BYTES" format 999,999,999,999 head "Expired Bytes"
select segment_name, nvl(sum(act),0) "ACT BYTES", 
nvl(sum(unexp),0) "UNEXP BYTES",
nvl(sum(exp),0) "EXP BYTES"
from (select segment_name, nvl(sum(bytes),0) act,00 unexp, 00 exp
from dba_undo_extents where status='ACTIVE' group by segment_name
union 
select segment_name, 00 act, nvl(sum(bytes),0) unexp, 00 exp
from dba_undo_extents where status='UNEXPIRED' group by segment_name
union
select segment_name, 00 act, 00 unexp, nvl(sum(bytes),0) exp
from dba_undo_extents where status='EXPIRED' group by segment_name)
group by segment_name
order by 1
/
prompt 
prompt  ############## UNDO SPACE USAGE ############## 
prompt 
col usn format 999,999 head "Segment#"
col shrinks format 999,999,999 head "Shrinks"
col aveshrink format 999,999,999 head "Avg Shrink Size"
select usn, shrinks, aveshrink from v$rollstat
/
set termout on
set trimout off
set trimspool off
spool off

Script to Collect Data Guard Diagnostic Information

Overview
——–

This script is intended to provide an easy method to provide information
necessary to troubleshoot Data Guard issues.

Script Notes
————-

This script is intended to be run via sqlplus as the SYS or Internal user.

Script
——-

- - - - - - - - - - - - - - - - Script begins here - - - - - - - - - - - - - - - -
-- NAME: dg_prim_diag.sql  (Run on PRIMARY with a LOGICAL or PHYSICAL STANDBY)
set echo off
set feedback off
column timecol new_value timestamp
column spool_extension new_value suffix
select to_char(sysdate,'Mondd_hhmi') timecol,
'.out' spool_extension from sys.dual;
column output new_value dbname
select value || '_' output
from v$parameter where name = 'db_name';
spool dg_prim_diag_&&dbname&&timestamp&&suffix
set linesize 79
set pagesize 35
set trim on
set trims on
alter session set nls_date_format = 'MON-DD-YYYY HH24:MI:SS';
set feedback on
select to_char(sysdate) time from dual;
set echo on
-- In the following the database_role should be primary as that is what
-- this script is intended to be run on.  If protection_level is different
-- than protection_mode then for some reason the mode listed in
-- protection_mode experienced a need to downgrade.  Once the error
-- condition has been corrected the protection_level should match the
-- protection_mode after the next log switch.
column role format a7 tru
column name format a10 wrap
select name,database_role role,log_mode,
protection_mode,protection_level
from v$database;
-- ARCHIVER can be (STOPPED | STARTED | FAILED). FAILED means that the
-- archiver failed to archive a log last time, but will try again within 5
-- minutes. LOG_SWITCH_WAIT The ARCHIVE LOG/CLEAR LOG/CHECKPOINT event log
-- switching is waiting for.  Note that if ALTER SYSTEM SWITCH LOGFILE is
-- hung, but there is room in the current online redo log, then value is
-- NULL
column host_name format a20 tru
column version format a9 tru
select instance_name,host_name,version,archiver,log_switch_wait
from v$instance;
-- The following query give us information about catpatch.
-- This way we can tell if the procedure doesn't match the image.
select version, modified, status from dba_registry
where comp_id = 'CATPROC';
-- Force logging is not mandatory but is recommended.  Supplemental
-- logging must be enabled if the standby associated with this primary is
-- a logical standby. During normal operations it is acceptable for
-- SWITCHOVER_STATUS to be SESSIONS ACTIVE or TO STANDBY.
column force_logging format a13 tru
column remote_archive format a14 tru
column dataguard_broker format a16 tru
select force_logging,remote_archive,
supplemental_log_data_pk,supplemental_log_data_ui,
switchover_status,dataguard_broker
from v$database;
-- This query produces a list of all archive destinations.  It shows if
-- they are enabled, what process is servicing that destination, if the
-- destination is local or remote, and if remote what the current mount ID
-- is.
column destination format a35 wrap
column process format a7
column archiver format a8
column ID format 99
column mid format 99
select dest_id "ID",destination,status,target,
schedule,process,mountid  mid
from v$archive_dest order by dest_id;
-- This select will give further detail on the destinations as to what
-- options have been set.  Register indicates whether or not the archived
-- redo log is registered in the remote destination control file.
set numwidth 8
column ID format 99
select dest_id "ID",archiver,transmit_mode,affirm,async_blocks async,
net_timeout net_time,delay_mins delay,reopen_secs reopen,
register,binding
from v$archive_dest order by dest_id;
-- The following select will show any errors that occured the last time
-- an attempt to archive to the destination was attempted.  If ERROR is
-- blank and status is VALID then the archive completed correctly.
column error format a55 wrap
select dest_id,status,error from v$archive_dest;
-- The query below will determine if any error conditions have been
-- reached by querying the v$dataguard_status view (view only available in
-- 9.2.0 and above):
column message format a80
select message, timestamp
from v$dataguard_status
where severity in ('Error','Fatal')
order by timestamp;
-- The following query will determine the current sequence number
-- and the last sequence archived.  If you are remotely archiving
-- using the LGWR process then the archived sequence should be one
-- higher than the current sequence.  If remotely archiving using the
-- ARCH process then the archived sequence should be equal to the
-- current sequence.  The applied sequence information is updated at
-- log switch time.
select ads.dest_id,max(sequence#) "Current Sequence",
max(log_sequence) "Last Archived"
from v$archived_log al, v$archive_dest ad, v$archive_dest_status ads
where ad.dest_id=al.dest_id
and al.dest_id=ads.dest_id
group by ads.dest_id;
-- The following select will attempt to gather as much information as
-- possible from the standby.  SRLs are not supported with Logical Standby
-- until Version 10.1.
set numwidth 8
column ID format 99
column "SRLs" format 99
column Active format 99
select dest_id id,database_mode db_mode,recovery_mode,
protection_mode,standby_logfile_count "SRLs",
standby_logfile_active ACTIVE,
archived_seq#
from v$archive_dest_status;
-- Query v$managed_standby to see the status of processes involved in
-- the shipping redo on this system.  Does not include processes needed to
-- apply redo.
select process,status,client_process,sequence#
from v$managed_standby;
-- The following query is run on the primary to see if SRL's have been
-- created in preparation for switchover.
select group#,sequence#,bytes from v$standby_log;
-- The above SRL's should match in number and in size with the ORL's
-- returned below:
select group#,thread#,sequence#,bytes,archived,status from v$log;
-- Non-default init parameters.
set numwidth 5
column name format a30 tru
column value format a48 wra
select name, value
from v$parameter
where isdefault = 'FALSE';
spool off
- - - - - - - - - - - - - - - -  Script ends here  - - - - - - - - - - - - - - - -

Overview
——–

This script is intended to provide an easy method to provide information
necessary to troubleshoot Data Guard issues.

Script Notes
————-

This script is intended to be run via sqlplus as the SYS or Internal user.

Script
——-

- - - - - - - - - - - - - - - - Script begins here - - - - - - - - - - - - - - - -
-- NAME: DG_phy_stby_diag.sql
set echo off
set feedback off
column timecol new_value timestamp
column spool_extension new_value suffix
select to_char(sysdate,'Mondd_hhmi') timecol,
'.out' spool_extension from sys.dual;
column output new_value dbname
select value || '_' output
from v$parameter where name = 'db_name';
spool dgdiag_phystby_&&dbname&&timestamp&&suffix
set lines 200
set pagesize 35
set trim on
set trims on
alter session set nls_date_format = 'MON-DD-YYYY HH24:MI:SS';
set feedback on
select to_char(sysdate) time from dual;
set echo on
--
-- ARCHIVER can be  (STOPPED | STARTED | FAILED) FAILED means that the archiver failed
-- to archive a -- log last time, but will try again within 5 minutes. LOG_SWITCH_WAIT
-- The ARCHIVE LOG/CLEAR LOG/CHECKPOINT event log switching is waiting for. Note that
-- if ALTER SYSTEM SWITCH LOGFILE is hung, but there is room in the current online
-- redo log, then value is NULL
column host_name format a20 tru
column version format a9 tru
select instance_name,host_name,version,archiver,log_switch_wait from v$instance;
-- The following select will give us the generic information about how this standby is
-- setup.  The database_role should be standby as that is what this script is intended
-- to be ran on.  If protection_level is different than protection_mode then for some
-- reason the mode listed in protection_mode experienced a need to downgrade.  Once the
-- error condition has been corrected the protection_level should match the protection_mode
-- after the next log switch.
column ROLE format a7 tru
select name,database_role,log_mode,controlfile_type,protection_mode,protection_level
from v$database;
-- Force logging is not mandatory but is recommended.  Supplemental logging should be enabled
-- on the standby if a logical standby is in the configuration. During normal
-- operations it is acceptable for SWITCHOVER_STATUS to be SESSIONS ACTIVE or NOT ALLOWED.
column force_logging format a13 tru
column remote_archive format a14 tru
column dataguard_broker format a16 tru
select force_logging,remote_archive,supplemental_log_data_pk,supplemental_log_data_ui,
switchover_status,dataguard_broker from v$database;
-- This query produces a list of all archive destinations and shows if they are enabled,
-- what process is servicing that destination, if the destination is local or remote,
-- and if remote what the current mount ID is. For a physical standby we should have at
-- least one remote destination that points the primary set but it should be deferred.
COLUMN destination FORMAT A35 WRAP
column process format a7
column archiver format a8
column ID format 99
select dest_id "ID",destination,status,target,
archiver,schedule,process,mountid
from v$archive_dest;
-- If the protection mode of the standby is set to anything higher than max performance
-- then we need to make sure the remote destination that points to the primary is set
-- with the correct options else we will have issues during switchover.
select dest_id,process,transmit_mode,async_blocks,
net_timeout,delay_mins,reopen_secs,register,binding
from v$archive_dest;
-- The following select will show any errors that occured the last time an attempt to
-- archive to the destination was attempted.  If ERROR is blank and status is VALID then
-- the archive completed correctly.
column error format a55 tru
select dest_id,status,error from v$archive_dest;
-- Determine if any error conditions have been reached by querying thev$dataguard_status
-- view (view only available in 9.2.0 and above):
column message format a80
select message, timestamp
from v$dataguard_status
where severity in ('Error','Fatal')
order by timestamp;
-- The following query is ran to get the status of the SRL's on the standby.  If the
-- primary is archiving with the LGWR process and SRL's are present (in the correct
-- number and size) then we should see a group# active.
select group#,sequence#,bytes,used,archived,status from v$standby_log;
-- The above SRL's should match in number and in size with the ORL's returned below:
select group#,thread#,sequence#,bytes,archived,status from v$log;
-- Query v$managed_standby to see the status of processes involved in the
-- configuration.
select process,status,client_process,sequence#,block#,active_agents,known_agents
from v$managed_standby;
-- Verify that the last sequence# received and the last sequence# applied to standby
-- database.
select al.thrd "Thread", almax "Last Seq Received", lhmax "Last Seq Applied"
from (select thread# thrd, max(sequence#) almax
from v$archived_log
where resetlogs_change#=(select resetlogs_change# from v$database)
group by thread#) al,
(select thread# thrd, max(sequence#) lhmax
from v$log_history
where first_time=(select max(first_time) from v$log_history)
group by thread#) lh
where al.thrd = lh.thrd;
-- The V$ARCHIVE_GAP fixed view on a physical standby database only returns the next
-- gap that is currently blocking redo apply from continuing. After resolving the
-- identified gap and starting redo apply, query the V$ARCHIVE_GAP fixed view again
-- on the physical standby database to determine the next gap sequence, if there is
-- one.
select * from v$archive_gap;
-- Non-default init parameters.
set numwidth 5
column name format a30 tru
column value format a50 wra
select name, value
from v$parameter
where isdefault = 'FALSE';
spool off
- - - - - - - - - - - - - - - -  Script ends here  - - - - - - - - - - - - - - - -

Overview
——–

This script is intended to provide an easy method to provide information
necessary to troubleshoot Data Guard issues.

Script Notes
————-

This script is intended to be run via sqlplus as the SYS or Internal user.

Script
——-

- - - - - - - - - - - - - - - - Script begins here - - - - - - - - - - - - - - - -
-- NAME: dg_lsby_diag.sql  (Run on LOGICAL STANDBY)
set echo off
set feedback off
column timecol new_value timestamp
column spool_extension new_value suffix
select to_char(sysdate,'Mondd_hhmi') timecol,
'.out' spool_extension from sys.dual;
column output new_value dbname
select value || '_' output
from v$parameter where name = 'db_name';
spool dg_lsby_diag_&&dbname&&timestamp&&suffix
set linesize 79
set pagesize 180
set long 1000
set trim on
set trims on
alter session set nls_date_format = 'MM/DD HH24:MI:SS';
set feedback on
select to_char(sysdate) time from dual;
set echo on
-- The following select will give us the generic information about how
-- this standby is setup.  The database_role should be logical standby as
-- that is what this script is intended to be ran on.
column ROLE format a7 tru
column NAME format a8 wrap
select name,database_role,log_mode,protection_mode
from v$database;
-- ARCHIVER can be (STOPPED | STARTED | FAILED). FAILED means that the
-- archiver failed to archive a log last time, but will try again within 5
-- minutes. LOG_SWITCH_WAIT The ARCHIVE LOG/CLEAR LOG/CHECKPOINT event log
-- switching is waiting for. Note that if ALTER SYSTEM SWITCH LOGFILE is
-- hung, but there is room in the current online redo log, then value is
-- NULL
column host_name format a20 tru
column version format a9 tru
select instance_name,host_name,version,archiver,log_switch_wait
from v$instance;
-- The following query give us information about catpatch.
-- This way we can tell if the procedure doesn't match the image.
select version, modified, status from dba_registry
where comp_id = 'CATPROC';
-- Force logging and supplemental logging are not mandatory but are
-- recommended if you plan to switchover.  During normal operations it is
-- acceptable for SWITCHOVER_STATUS to be SESSIONS ACTIVE or NOT ALLOWED.
column force_logging format a13 tru
column remote_archive format a14 tru
column dataguard_broker format a16 tru
select force_logging,remote_archive,supplemental_log_data_pk,
supplemental_log_data_ui,switchover_status,dataguard_broker
from v$database;
-- This query produces a list of all archive destinations.  It shows if
-- they are enabled, what process is servicing that destination, if the
-- destination is local or remote, and if remote what the current mount ID
-- is.
column destination format a35 wrap
column process format a7
column archiver format a8
column ID format 99
column mid format 99
select dest_id "ID",destination,status,target,
schedule,process,mountid  mid
from v$archive_dest order by dest_id;
-- This select will give further detail on the destinations as to what
-- options have been set.  Register indicates whether or not the archived
-- redo log is registered in the remote destination control file.
set numwidth 8
column ID format 99
select dest_id "ID",archiver,transmit_mode,affirm,async_blocks async,
net_timeout net_time,delay_mins delay,reopen_secs reopen,
register,binding
from v$archive_dest order by dest_id;
-- Determine if any error conditions have been reached by querying the
-- v$dataguard_status view (view only available in 9.2.0 and above):
column message format a80
select message, timestamp
from v$dataguard_status
where severity in ('Error','Fatal')
order by timestamp;
-- Query v$managed_standby to see the status of processes involved in
-- the shipping redo on this system.  Does not include processes needed to
-- apply redo.
select process,status,client_process,sequence#
from v$managed_standby;
-- Verify that log apply services on the standby are currently
-- running. If the query against V$LOGSTDBY returns no rows then logical
-- apply is not running.
column status format a50 wrap
column type format a11
set numwidth 15
SELECT TYPE, STATUS, HIGH_SCN
FROM V$LOGSTDBY;
-- The DBA_LOGSTDBY_PROGRESS view describes the progress of SQL apply
-- operations on the logical standby databases.  The APPLIED_SCN indicates
-- that committed transactions at or below that SCN have been applied. The
-- NEWEST_SCN is the maximum SCN to which data could be applied if no more
-- logs were received. This is usually the MAX(NEXT_CHANGE#)-1 from
-- DBA_LOGSTDBY_LOG.  When the value of NEWEST_SCN and APPLIED_SCN are the
-- equal then all available changes have been applied.  If your
-- APPLIED_SCN is below NEWEST_SCN and is increasing then SQL apply is
-- currently processing changes.
set numwidth 15
select
(case
when newest_scn = applied_scn then 'Done'
when newest_scn <= applied_scn + 9 then 'Done?'
when newest_scn > (select max(next_change#) from dba_logstdby_log)
then 'Near done'
when (select count(*) from dba_logstdby_log
where (next_change#, thread#) not in
(select first_change#, thread# from dba_logstdby_log)) > 1
then 'Gap'
when newest_scn > applied_scn then 'Not Done'
else '---' end) "Fin?",
newest_scn, applied_scn, read_scn from dba_logstdby_progress;
select newest_time, applied_time, read_time from dba_logstdby_progress;
-- Determine if apply is lagging behind and by how much.  Missing
-- sequence#'s in a range indicate that a gap exists.
set numwidth 15
column trd format 99
select thread# trd, sequence#,
first_change#, next_change#,
dict_begin beg, dict_end end,
to_char(timestamp, 'hh:mi:ss') timestamp,
(case when l.next_change# < p.read_scn then 'YES'
when l.first_change# < p.applied_scn then 'CURRENT'
else 'NO' end) applied
from dba_logstdby_log l, dba_logstdby_progress p
order by thread#, first_change#;
-- Get a history on logical standby apply activity.
set numwidth 15
select to_char(event_time, 'MM/DD HH24:MI:SS') time,
commit_scn, current_scn, event, status
from dba_logstdby_events
order by event_time, commit_scn, current_scn;
-- Dump logical standby stats
column name format a40
column value format a20
select * from v$logstdby_stats;
-- Dump logical standby parameters
column name format a33 wrap
column value format a33 wrap
column type format 99
select name, value, type from system.logstdby$parameters
order by type, name;
-- Gather log miner session and dictionary information.
set numwidth 15
select * from system.logmnr_session$;
select * from system.logmnr_dictionary$;
select * from system.logmnr_dictstate$;
select * from v$logmnr_session;
-- Query the log miner dictionary for key tables necessary to process
-- changes for logical standby Label security will move AUD$ from SYS to
-- SYSTEM.  A synonym will remain in SYS but Logical Standby does not
-- support this.
set numwidth 5
column name format a9 wrap
column owner format a6 wrap
select o.logmnr_uid, o.obj#, o.objv#, u.name owner, o.name
from system.logmnr_obj$ o, system.logmnr_user$ u
where
o.logmnr_uid = u.logmnr_uid and
o.owner# = u.user# and
o.name in ('JOB$','JOBSEQ','SEQ$','AUD$',
'FGA_LOG$','IND$','COL$','LOGSTDBY$PARAMETER')
order by u.name;
-- Non-default init parameters.
column name format a30 tru
column value format a48 wra
select name, value
from v$parameter
where isdefault = 'FALSE';
spool off
- - - - - - - - - - - - - - - -  Script ends here  - - - - - - - - - - - - - - - -

沪ICP备14014813号

沪公网安备 31010802001379号