Script:数据库最近的性能度量

以下SQL脚本可以用于列出数据库最近1分钟的性能度量信息(performance metric):

set linesize 80 pagesize 1400
SELECT /*+ ORDERED USE_MERGE(m) */
TO_CHAR(FROM_TZ(CAST(m.end_time AS TIMESTAMP),
TO_CHAR(systimestamp, 'tzr')) AT TIME ZONE sessiontimezone,
'YYYY-MM-DD HH24:MI:SS'),
SUM(CASE
WHEN a.internal_metric_name = 'transactions_ps' THEN
m.value
ELSE
0
END) transactions_ps,
SUM(CASE
WHEN a.internal_metric_name = 'physreads_ps' THEN
m.value
ELSE
0
END) physreads_ps,
SUM(CASE
WHEN a.internal_metric_name = 'physreads_pt' THEN
m.value
ELSE
0
END) physreads_pt,
SUM(CASE
WHEN a.internal_metric_name = 'physwrites_ps' THEN
m.value
ELSE
0
END) physwrites_ps,
SUM(CASE
WHEN a.internal_metric_name = 'physwrites_pt' THEN
m.value
ELSE
0
END) physwrites_pt,
SUM(CASE
WHEN a.internal_metric_name = 'physreadsdir_ps' THEN
m.value
ELSE
0
END) physreadsdir_ps,
SUM(CASE
WHEN a.internal_metric_name = 'physreadsdir_pt' THEN
m.value
ELSE
0
END) physreadsdir_pt,
SUM(CASE
WHEN a.internal_metric_name = 'physwritesdir_ps' THEN
m.value
ELSE
0
END) physwritesdir_ps,
SUM(CASE
WHEN a.internal_metric_name = 'physwritesdir_pt' THEN
m.value
ELSE
0
END) physwritesdir_pt,
SUM(CASE
WHEN a.internal_metric_name = 'physreadslob_ps' THEN
m.value
ELSE
0
END) physreadslob_ps,
SUM(CASE
WHEN a.internal_metric_name = 'physreadslob_pt' THEN
m.value
ELSE
0
END) physreadslob_pt,
SUM(CASE
WHEN a.internal_metric_name = 'physwriteslob_ps' THEN
m.value
ELSE
0
END) physwriteslob_ps,
SUM(CASE
WHEN a.internal_metric_name = 'physwriteslob_pt' THEN
m.value
ELSE
0
END) physwriteslob_pt,
SUM(CASE
WHEN a.internal_metric_name = 'redosize_ps' THEN
m.value
ELSE
0
END) redosize_ps,
SUM(CASE
WHEN a.internal_metric_name = 'logons_ps' THEN
m.value
ELSE
0
END) logons_ps,
SUM(CASE
WHEN a.internal_metric_name = 'logons_pt' THEN
m.value
ELSE
0
END) logons_pt,
SUM(CASE
WHEN a.internal_metric_name = 'opncurs_ps' THEN
m.value
ELSE
0
END) opncurs_ps,
SUM(CASE
WHEN a.internal_metric_name = 'opncurs_pt' THEN
m.value
ELSE
0
END) opncurs_pt,
SUM(CASE
WHEN a.internal_metric_name = 'commits_ps' THEN
m.value
ELSE
0
END) commits_ps,
SUM(CASE
WHEN a.internal_metric_name = 'commits_pt' THEN
m.value
ELSE
0
END) commits_pt,
SUM(CASE
WHEN a.internal_metric_name = 'rollbacks_ps' THEN
m.value
ELSE
0
END) rollbacks_ps,
SUM(CASE
WHEN a.internal_metric_name = 'rollbacks_pt' THEN
m.value
ELSE
0
END) rollbacks_pt,
SUM(CASE
WHEN a.internal_metric_name = 'usercalls_ps' THEN
m.value
ELSE
0
END) usercalls_ps,
SUM(CASE
WHEN a.internal_metric_name = 'usercalls_pt' THEN
m.value
ELSE
0
END) usercalls_pt,
SUM(CASE
WHEN a.internal_metric_name = 'recurscalls_ps' THEN
m.value
ELSE
0
END) recurscalls_ps,
SUM(CASE
WHEN a.internal_metric_name = 'recurscalls_pt' THEN
m.value
ELSE
0
END) recurscalls_pt,
SUM(CASE
WHEN a.internal_metric_name = 'logreads_ps' THEN
m.value
ELSE
0
END) logreads_ps,
SUM(CASE
WHEN a.internal_metric_name = 'logreads_pt' THEN
m.value
ELSE
0
END) logreads_pt,
SUM(CASE
WHEN a.internal_metric_name = 'dbwrcheckpoints_ps' THEN
m.value
ELSE
0
END) dbwrcheckpoints_ps,
SUM(CASE
WHEN a.internal_metric_name = 'bgcheckpoints_ps' THEN
m.value
ELSE
0
END) bgcheckpoints_ps,
SUM(CASE
WHEN a.internal_metric_name = 'redowrites_ps' THEN
m.value
ELSE
0
END) redowrites_ps,
SUM(CASE
WHEN a.internal_metric_name = 'redowrites_pt' THEN
m.value
ELSE
0
END) redowrites_pt,
SUM(CASE
WHEN a.internal_metric_name = 'tabscanslong_ps' THEN
m.value
ELSE
0
END) tabscanslong_ps,
SUM(CASE
WHEN a.internal_metric_name = 'tabscanslong_pt' THEN
m.value
ELSE
0
END) tabscanslong_pt,
SUM(CASE
WHEN a.internal_metric_name = 'tabscanstotal_ps' THEN
m.value
ELSE
0
END) tabscanstotal_ps,
SUM(CASE
WHEN a.internal_metric_name = 'tabscanstotal_pt' THEN
m.value
ELSE
0
END) tabscanstotal_pt,
SUM(CASE
WHEN a.internal_metric_name = 'indxscansfull_pt' THEN
m.value
ELSE
0
END) indxscansfull_pt,
SUM(CASE
WHEN a.internal_metric_name = 'indxscansfull_ps' THEN
m.value
ELSE
0
END) indxscansfull_ps,
SUM(CASE
WHEN a.internal_metric_name = 'indxscanstotal_ps' THEN
m.value
ELSE
0
END) indxscanstotal_ps,
SUM(CASE
WHEN a.internal_metric_name = 'indxscanstotal_pt' THEN
m.value
ELSE
0
END) indxscanstotal_pt,
SUM(CASE
WHEN a.internal_metric_name = 'parses_ps' THEN
m.value
ELSE
0
END) parses_ps,
SUM(CASE
WHEN a.internal_metric_name = 'parses_pt' THEN
m.value
ELSE
0
END) parses_pt,
SUM(CASE
WHEN a.internal_metric_name = 'hardparses_ps' THEN
m.value
ELSE
0
END) hardparses_ps,
SUM(CASE
WHEN a.internal_metric_name = 'hardparses_pt' THEN
m.value
ELSE
0
END) hardparses_pt,
SUM(CASE
WHEN a.internal_metric_name = 'failedparses_ps' THEN
m.value
ELSE
0
END) failedparses_ps,
SUM(CASE
WHEN a.internal_metric_name = 'failedparses_pt' THEN
m.value
ELSE
0
END) failedparses_pt,
SUM(CASE
WHEN a.internal_metric_name = 'executions_ps' THEN
m.value
ELSE
0
END) executions_ps,
SUM(CASE
WHEN a.internal_metric_name = 'sortsdisk_ps' THEN
m.value
ELSE
0
END) sortsdisk_ps,
SUM(CASE
WHEN a.internal_metric_name = 'sortsdisk_pt' THEN
m.value
ELSE
0
END) sortsdisk_pt,
SUM(CASE
WHEN a.internal_metric_name = 'rows_psort' THEN
m.value
ELSE
0
END) rows_psort,
SUM(CASE
WHEN a.internal_metric_name = 'executeswoparse_pct' THEN
m.value
ELSE
0
END) executeswoparse_pct,
SUM(CASE
WHEN a.internal_metric_name = 'softparse_pct' THEN
m.value
ELSE
0
END) softparse_pct,
SUM(CASE
WHEN a.internal_metric_name = 'usercall_pct' THEN
m.value
ELSE
0
END) usercall_pct,
SUM(CASE
WHEN a.internal_metric_name = 'networkbytes_ps' THEN
m.value
ELSE
0
END) networkbytes_ps,
SUM(CASE
WHEN a.internal_metric_name = 'enqtimeouts_ps' THEN
m.value
ELSE
0
END) enqtimeouts_ps,
SUM(CASE
WHEN a.internal_metric_name = 'enqtimeouts_pt' THEN
m.value
ELSE
0
END) enqtimeouts_pt,
SUM(CASE
WHEN a.internal_metric_name = 'enqwaits_ps' THEN
m.value
ELSE
0
END) enqwaits_ps,
SUM(CASE
WHEN a.internal_metric_name = 'enqwaits_pt' THEN
m.value
ELSE
0
END) enqwaits_pt,
SUM(CASE
WHEN a.internal_metric_name = 'enqdeadlocks_ps' THEN
m.value
ELSE
0
END) enqdeadlocks_ps,
SUM(CASE
WHEN a.internal_metric_name = 'enqdeadlocks_pt' THEN
m.value
ELSE
0
END) enqdeadlocks_pt,
SUM(CASE
WHEN a.internal_metric_name = 'enqreqs_ps' THEN
m.value
ELSE
0
END) enqreqs_ps,
SUM(CASE
WHEN a.internal_metric_name = 'enqreqs_pt' THEN
m.value
ELSE
0
END) enqreqs_pt,
SUM(CASE
WHEN a.internal_metric_name = 'dbblkgets_ps' THEN
m.value
ELSE
0
END) dbblkgets_ps,
SUM(CASE
WHEN a.internal_metric_name = 'dbblkgets_pt' THEN
m.value
ELSE
0
END) dbblkgets_pt,
SUM(CASE
WHEN a.internal_metric_name = 'consistentreadgets_ps' THEN
m.value
ELSE
0
END) consistentreadgets_ps,
SUM(CASE
WHEN a.internal_metric_name = 'consistentreadgets_pt' THEN
m.value
ELSE
0
END) consistentreadgets_pt,
SUM(CASE
WHEN a.internal_metric_name = 'dbblkchanges_ps' THEN
m.value
ELSE
0
END) dbblkchanges_ps,
SUM(CASE
WHEN a.internal_metric_name = 'dbblkchanges_pt' THEN
m.value
ELSE
0
END) dbblkchanges_pt,
SUM(CASE
WHEN a.internal_metric_name = 'consistentreadchanges_ps' THEN
m.value
ELSE
0
END) consistentreadchanges_ps,
SUM(CASE
WHEN a.internal_metric_name = 'consistentreadchanges_pt' THEN
m.value
ELSE
0
END) consistentreadchanges_pt,
SUM(CASE
WHEN a.internal_metric_name = 'crblks_ps' THEN
m.value
ELSE
0
END) crblks_ps,
SUM(CASE
WHEN a.internal_metric_name = 'crblks_pt' THEN
m.value
ELSE
0
END) crblks_pt,
SUM(CASE
WHEN a.internal_metric_name = 'crundorecs_pt' THEN
m.value
ELSE
0
END) crundorecs_pt,
SUM(CASE
WHEN a.internal_metric_name = 'userrollbackundorec_ps' THEN
m.value
ELSE
0
END) userrollbackundorec_ps,
SUM(CASE
WHEN a.internal_metric_name = 'userrollbackundorec_pt' THEN
m.value
ELSE
0
END) userrollbackundorec_pt,
SUM(CASE
WHEN a.internal_metric_name = 'leafnodesplits_ps' THEN
m.value
ELSE
0
END) leafnodesplits_ps,
SUM(CASE
WHEN a.internal_metric_name = 'leafnodesplits_pt' THEN
m.value
ELSE
0
END) leafnodesplits_pt,
SUM(CASE
WHEN a.internal_metric_name = 'branchnodesplits_ps' THEN
m.value
ELSE
0
END) branchnodesplits_ps,
SUM(CASE
WHEN a.internal_metric_name = 'branchnodesplits_pt' THEN
m.value
ELSE
0
END) branchnodesplits_pt,
SUM(CASE
WHEN a.internal_metric_name = 'redosize_pt' THEN
m.value
ELSE
0
END) redosize_pt,
SUM(CASE
WHEN a.internal_metric_name = 'crundorecs_ps' THEN
m.value
ELSE
0
END) crundorecs_ps,
SUM(CASE
WHEN a.internal_metric_name = 'dbtime_ps' THEN
m.value
ELSE
0
END) dbtime_ps,
SUM(CASE
WHEN a.internal_metric_name = 'avg_active_sessions' THEN
m.value
ELSE
0
END) avg_active_sessions,
SUM(CASE
WHEN a.internal_metric_name = 'avg_sync_singleblk_read_latency' THEN
m.value
ELSE
0
END) avg_block_read_latency,
SUM(CASE
WHEN a.internal_metric_name = 'iombs_ps' THEN
m.value
ELSE
0
END) iombs_ps,
SUM(CASE
WHEN a.internal_metric_name = 'iorequests_ps' THEN
m.value
ELSE
0
END) iorequests_ps
FROM v$alert_types a, v$threshold_types t, v$sysmetric m
WHERE a.internal_metric_category = 'instance_throughput'
AND a.reason_id = t.alert_reason_id
AND t.metrics_id = m.metric_id
AND m.group_id = 2
AND m.end_time <= SYSDATE
GROUP BY m.end_time
ORDER BY m.end_time ASC
/

使用方法:

SQL> @metric
TO_CHAR(FROM_TZ(CAS TRANSACTIONS_PS PHYSREADS_PS PHYSREADS_PT PHYSWRITES_PS
------------------- --------------- ------------ ------------ -------------
PHYSWRITES_PT PHYSREADSDIR_PS PHYSREADSDIR_PT PHYSWRITESDIR_PS PHYSWRITESDIR_PT
------------- --------------- --------------- ---------------- ----------------
PHYSREADSLOB_PS PHYSREADSLOB_PT PHYSWRITESLOB_PS PHYSWRITESLOB_PT REDOSIZE_PS
--------------- --------------- ---------------- ---------------- -----------
LOGONS_PS  LOGONS_PT OPNCURS_PS OPNCURS_PT COMMITS_PS COMMITS_PT ROLLBACKS_PS
---------- ---------- ---------- ---------- ---------- ---------- ------------
ROLLBACKS_PT USERCALLS_PS USERCALLS_PT RECURSCALLS_PS RECURSCALLS_PT LOGREADS_PS
------------ ------------ ------------ -------------- -------------- -----------
LOGREADS_PT DBWRCHECKPOINTS_PS BGCHECKPOINTS_PS REDOWRITES_PS REDOWRITES_PT
----------- ------------------ ---------------- ------------- -------------
TABSCANSLONG_PS TABSCANSLONG_PT TABSCANSTOTAL_PS TABSCANSTOTAL_PT
--------------- --------------- ---------------- ----------------
INDXSCANSFULL_PT INDXSCANSFULL_PS INDXSCANSTOTAL_PS INDXSCANSTOTAL_PT  PARSES_PS
---------------- ---------------- ----------------- ----------------- ----------
PARSES_PT HARDPARSES_PS HARDPARSES_PT FAILEDPARSES_PS FAILEDPARSES_PT
---------- ------------- ------------- --------------- ---------------
EXECUTIONS_PS SORTSDISK_PS SORTSDISK_PT ROWS_PSORT EXECUTESWOPARSE_PCT
------------- ------------ ------------ ---------- -------------------
SOFTPARSE_PCT USERCALL_PCT NETWORKBYTES_PS ENQTIMEOUTS_PS ENQTIMEOUTS_PT
------------- ------------ --------------- -------------- --------------
ENQWAITS_PS ENQWAITS_PT ENQDEADLOCKS_PS ENQDEADLOCKS_PT ENQREQS_PS ENQREQS_PT
----------- ----------- --------------- --------------- ---------- ----------
DBBLKGETS_PS DBBLKGETS_PT CONSISTENTREADGETS_PS CONSISTENTREADGETS_PT
------------ ------------ --------------------- ---------------------
DBBLKCHANGES_PS DBBLKCHANGES_PT CONSISTENTREADCHANGES_PS
--------------- --------------- ------------------------
CONSISTENTREADCHANGES_PT  CRBLKS_PS  CRBLKS_PT CRUNDORECS_PT
------------------------ ---------- ---------- -------------
USERROLLBACKUNDOREC_PS USERROLLBACKUNDOREC_PT LEAFNODESPLITS_PS
---------------------- ---------------------- -----------------
LEAFNODESPLITS_PT BRANCHNODESPLITS_PS BRANCHNODESPLITS_PT REDOSIZE_PT
----------------- ------------------- ------------------- -----------
CRUNDORECS_PS  DBTIME_PS AVG_ACTIVE_SESSIONS AVG_BLOCK_READ_LATENCY   IOMBS_PS
------------- ---------- ------------------- ---------------------- ----------
IOREQUESTS_PS
-------------
2011-10-27 20:02:23      .349533955   2.69640479   7.71428571    .116511318
.333333333      .199733688      .571428571       .116511318       .333333333
.116511318      .333333333       .116511318       .333333333  16212.0506
.016644474 .047619048 19.0745672 54.5714286 .349533955        100            0
0   .349533955            1     81.1917443     232.285714   130.54261
373.47619                  0                0    .515978695    1.47619048
0               0       .216378162       .619047619
0                0        19.4573901        55.6666667 4.92676431
14.0952381    1.89747004    5.42857143               0               0
20.4560586            0            0  22.026087          75.9153784
61.4864865    .42865891      995.838881              0              0
0           0               0               0  8.9713715 25.6666667
44.9400799   128.571429              85.60253            244.904762
45.1398136      129.142857               .249667111
.714285714          0          0             0
0                      0        .199733688
.571428571                   0                   0  46381.9048
0 .023586884          .000235869             .032960413 3.09587217
195.489348
PS Per Second
PT Per Transaction

[repost]Oracle RDBMS:Generic Large Object (LOB) Performance Guidelines

Oracle的Giri Mandalika给我们介绍了LOB大型对象的一些调优注意事项,之前我一直对_shared_io_pool_size这个undocumented参数不甚了解,以为它是一个shared pool相关的参数,根本原因是甚至没有任何一个公开的Mos Note介绍了这几个隐藏参数。

而这篇文章给出了比较好的解释,这里引用一下:

This blog post is generic in nature and based on my recent experience with a content management system where securefile BLOBs are critical in storing and retrieving the checked in content. It is stro ngly suggested to check the official documentation in addition to these brief guidelines. In general, Oracle Database SecureFiles and Large Objects Developer’s Guide 11g Release 2 (11.2) is a good starting point when creating tables involving SecureFiles and LOBs.

Guidelines

  • Tablespace: create the LOB in a different tablespace isolated from the rest of the database
  • Block size: consider larger block size (default 8 KB) if the expected size of the LOB is big
  • Chunk size: consider larger chunk size (default 8 KB) if larger LOBs are expected to be stored and retrieved
  • Inline or Out-of-line: choose “DISABLE STORAGE IN ROW” (out-of-line) if the average LOB size is expected to be > 4 KB. The default inlining is fine for smaller LOBs
  • CACHE or NOCACHE: consider bypassing the database buffer cache (NOCACHE) if large number of LOBs are stored and not expected to be retrieved frequently
  • COMPRESS or NOCOMPRESS: choose COMPRESS option if storage capacity is a concern and a constraint. It saves disk space at the expense of some performance overhead. In a RAC database environment, it is recommended to compress the LOBs to reduce the interconnect traffic
  • De-duplication: by default, duplicate LOBs are stored as a separate copy in the database. Choosing DEDUPLICATE option enables sharing the same data blocks for similar files thus reducing storage overhead and simplifying storage management
  • Partitioning: consider partitioning the parent table to maximize application performance. Hash partitioning is one of the options if there is no potential partition key in the table
  • Zero-Copy I/O protocol: turned on by default. Turning it off in a RAC database environment could be beneficial. Set the initialization parameter _use_zero_copy_io=FALSE to turn o ff the Zero-Copy I/O protocol
  • Shared I/O pool: database uses the shared I/O pool to perform large I/O operations on securefile LOBs. The shared I/O pool uses shared memory segments. If this pool is not large enough or if there is not enough memory available in this pool for a securefile LOB I/O operation, Oracle uses a portion of PGA until there is sufficient memory available in the shared I/O pool. Hence it is recommen ded to size the shared I/O pool appropriately by monitoring the database during the peak activity. Relevant initialization parameters: _shared_io_pool_size and _shared_iop_max_size

利用Procexp工具监控Windows平台上的Oracle数据库性能

我们可以从http://technet.microsoft.com/en-us/sysinternals下载到Windows平台上的系统内部调试工具包,这些工具中大部分是由Mark Russinovich编写的,其中最为著名的tools包括进程管探测器(Process Explorer)、Regmon等。

这里我们要介绍的是使用Procexp工具监控Windows平台上的Oracle性能信息。Procexp是一套功能齐全的进程信息管理工具,它使用图形界面显示(GUI),可以把它看做是Windows平台上taskmgr.exe任务管理器的扩展,事实上它完全足以代替taskmgr,前提是用户需要有一定的OS基础。

我们来看看Procexp针对运行在Windows上的Oracle(一种不太推荐的组合)时所能监控的信息:

1.进程属性

  • 包括进程的性能数据,包括CPU、Virtual Memory、Physical Memory、I/O、Handles
  • 查看详细的线程信息(包括个别线程的CPU使用率)
  • 查看线程堆栈(thread stack)
  • Kill/suspend thread

2.系统全局信息

  • 提供系统级别的性能数据

3.创建进程的DUMP文件

  • 创建FULL或minidump转储以便诊断BUG

4.识别文件句柄(Handle)或动态链接库(DDL)

  • 可以用来判断进程锁住了那些DDL文件或普通文件句柄

当我们在Windows上成功启动Oracle实例后就可以通过Procexp.exe工具来监控数据库性能了,使用十分简便,选中”Oracle.exe”进程之后右键菜单Properties即可浏览进程的属性:

procexp_monitor_oracle1

选中Performance面板后可以浏览进程的性能数据,这就像是Windows上的nmon命令,注意要使用管理员身份运行Procexp.exe,否则可能性能数据无法正确收集而显示N/A:

procexp_monitor_oracle2

选中Performance Graphy面板浏览图形化的性能趋势图:

procexp_monitor_oracle3

 

点击Thread面板我们可以浏览Oracle.exe进程下的线程信息,Windows平台上的一点不便就是无法通过线程信息直接判断该线程是哪个”后台进程”或”服务进程”,需要配合v$process视图才能做到。

SQL> select spid ,program from v$process;
SPID                     PROGRAM
------------------------ --------------------
PSEUDO
3124                     ORACLE.EXE (PMON)
4328                     ORACLE.EXE (VKTM)
5096                     ORACLE.EXE (GEN0)
2840                     ORACLE.EXE (DIAG)
2068                     ORACLE.EXE (DBRM)
2464                     ORACLE.EXE (PSP0)
4468                     ORACLE.EXE (DIA0)
120                      ORACLE.EXE (MMAN)
4424                     ORACLE.EXE (DBW0)
1312                     ORACLE.EXE (LGWR)
684                      ORACLE.EXE (CKPT)
5684                     ORACLE.EXE (SMON)
1016                     ORACLE.EXE (RECO)
4516                     ORACLE.EXE (MMON)
1108                     ORACLE.EXE (MMNL)
6108                     ORACLE.EXE (NSS2)
2728                     ORACLE.EXE (SHAD)
18 rows selected.

以上SPID=3124即指TID为3124的线程为PMON”后台进程”,在Thread面板上能够直接了解到某个线程的CPU使用率,这在我们诊断Oracle.exe进程有过高的CPU使用率时可以方便定位;点击stack按钮可以调出该线程当前的调用栈,这在我们确定BUG的时候很有用。

procexp_monitor_oracle4

 

同时在以上Thread面板上还可以使用Kill/Suspend按钮来杀死或停止某个异常线程(前提是我们确认所要杀死的线程是非关键后台的non-critical background thread),在Windows平台上这原本是需要使用orakill命令来完成的。

TCP/IP面板可以为我们提供简要的进程网络信息,包括Local Address和Remote Address,如果要获取更完整的信息可以配合其他网络监控工具(如工具包中的TCPView):

procexp_monitor_oracle5

 

另一个十分有用的功能是Environment面板,该面板用以显示详细的环境变量信息,如:Path,TEMP,ORACLE_SID,CLASSPATH等等,在诊断一些本地登录问题或实例异常问题时十分有效:

procexp_monitor_oracle6

Procexp工具也能像taskmgr那样监控系统级的性能信息,而且更为详细,点击主面板上的View -> System Information:

procexp_monitor_oracle7

 

如上文所述View DDL/Handle功能可以帮助我们了解Oracle进程所调用的动态链接库文件(DDL)和所持有的文件句柄(Handle)信息,因为Windows平台上某个被打开的文件时无法被同时修改或移动的,这在我们维护过程中可能造成许多麻烦, 而又因为win平台上没有如lsof,fuser这样的工具,所以我们在诊断Oracle软件的某些文件锁定问题时可以借助于该功能。

procexp_monitor_oracle8

如上图所示Oracle.exe持有”\Device\NamedPipe\*oraspawn_pipe*.4284“等多个文件的句柄。

procexp_monitor_oracle9

如上图所示Oracle.exe加载了多个ora开头的DDL,因为Windows平台上的特殊性,Oracle软件大量使用DDL库来替换在Unix平台上编译在Oracle 2进制镜像中的指令,这样方便了升级(直接替换DDL文件就可以了,无需编译,这也导致Windows平台上PSU/CPU补丁发布的特殊性),可以注意到这些DDL文件还标有Version信息,大多为11.02.0000.0001,编译时间为2010/2/10 9:01。

在较新版本的Procexp工具中还加入了create dump功能,以完善该工具的诊断能力。针对Oracle实例的异常现象和Bug可以创建进程转储信息,以便提交给Oracle Support分析问题,一般来说你并不需要亲自分析dump文件,这是一项高级功能,不要对正常运行着的生产数据库使用这一终极手段。

procexp_monitor_oracle10

总结

如果你还在抱怨Windows平台上为什么没有一个如Unix平台上NMON功能强大的监控软件的话,那么Procexp会是一个非常杰出的选择,另一点需要感恩的是这是一款免费软件,访问该软件的Homepage,可以让你了解更多的有用信息。

Oracle备份恢复:Rman Backup缓慢问题一例

近日客户报多套10g的数据库在使用NBU磁带备份时出现RMAN FULL BACKUP十分缓慢的问题,这些数据库中最大一个的达到2.61T,该数据库在一个月前地全库0级备份耗时在3-4个小时,而在最近猛涨到17个小时。客户之前已经向Symantec提交了服务请求,但暂时没有得到结论。希望我们从Oracle角度分析该备份速度变慢问题。

我们首先分析了备份信息的动态视图V$rman_backup_job_details:

OUTPUT_DEVICE INPUT_TYPE ELAPSED_SECONDS INPUT_BYTES_DISPLAY INPUT_BYTES_PER_SEC OUTPUT_BYTES_PER_SEC
17 SBT_TAPE DB INCR 62078 2.61T 44.08M 18.10M

以上可以确认在对2.61T大小的数据库执行全库磁带备份时耗费了62078s,这里还显示了backup时每秒的读取IO为44M,每秒的写出IO为18M;这里不能因为简单的output io per second<input io per second,而断论写出能力存在瓶颈;备份时对数据文件的读取和写出backup piece到备份介质上的操作是一个整体,CPU、Input IO、Output IO任何一环都可能成为备份的瓶颈;譬如因为对数据文件的读取IO存在瓶颈,那么相应的写出IO也会慢下来;又譬如当RMAN与备份服务器之间的IO带宽存在瓶颈,那么相应的读取IO也会不得不慢下来。具体是哪一个环节出现了问题,我们需要求助于其他的RMAN动态性能视图,如:

V$BACKUP_SYNC_IO
Displays rows when the I/O is synchronous to the process (or thread on some platforms) performing the backup.

V$BACKUP_ASYNC_IO
Displays rows when the I/O is asynchronous to the process (or thread on some platforms) performing the backup.

以上2个视图的区别在于一个汇聚了使用同步IO执行RMAN备份恢复操作的性能信息,而另一个是异步IO的。

因为客户使用默认配置disk_async_io为true,故而我们首先关注input IO的性能信息,这些信息存放在V$backup_async_io视图中;而对于磁带设备未启用slaves IO模拟的异步IO(tape_asynch_io=true但是backup_tape_io_slaves为默认的false),所以与磁带设备相关的output IO的性能信息存放在v$backup_sync_io视图中。

DEVICE OPEN_TIME ELAPSED BYTES/S IO_COUNT READY long_waits LONG_WAIT_TIME_TOTAL LONG_WAIT_TIME_MAX
DISK 4/4 388900 372827681 2765564 2074114 90028 231181 53
DISK 4/5 753900 192323498 2765564 2074114 90028 178548 41
DISK 4/6 369000 392934106 2765564 2074114 90028 243507 41
DISK 4/7 405100 357918255 2765564 2074114 90028 268117 73
DISK 4/8 347900 416765407 2765564 2074114 90028 183543 77
DISK 4/9 395800 366328159 2765564 2074114 90028 255399 48
DISK 4/10 428400 338451646 2765564 2074114 90028 268544 45
DISK 4/11 413200 350901949 2765564 2074114 90028 269857 42
DISK 4/12 735400 197161661 2765564 2074114 90028 169016 34
DISK 4/13 410000 353640696 2765564 2074114 90028 283607 60
DISK 4/14 408300 355113116 2765564 2074114 90028 279012 38
DISK 4/15 442700 327519054 2765564 2074114 90028 308744 605
DISK 4/16 393000 368938130 2765564 2074114 90028 251509 205
DISK 4/17 423100 342691291 2765564 2074114 90028 273868 42
DISK 4/18 375600 386029513 2765564 2074114 90028 230859 328
DISK 4/19 721200 201043657 2765564 2074114 90028 191753 162
DISK 4/20 401000 361577769 2765564 2074114 90028 272852 147
DISK 4/21 346600 418328578 2765564 2074114 90028 209569 109
DISK 4/22 400500 362029177 2765564 2074114 90028 265060 112
DISK 4/23 402400 360319794 2765564 2074114 90028 259008 594
DISK 4/24 449600 322492627 2765564 2074114 90028 274202 64
DISK 4/25 413900 350308493 2765564 2074114 90028 269380 230
DISK 4/26 748600 193685126 2765564 2074114 90028 177477 105
DISK 4/27 389900 371871468 2765564 2074114 90028 261200 38
DISK 4/28 383800 377781879 2765564 2074114 90028 241870 158
DISK 4/29 403700 359159488 2765564 2074114 90028 266135 212
DISK 4/30 390600 371205031 2765564 2074114 90028 248161 340
DISK 5/1 463600 312753851 2765564 2074114 90028 271247 39
DISK 5/2 419900 345302894 2765564 2074114 90028 255042 117
DISK 5/3 705700 205459381 2765564 2074114 90028 173043 189
DISK 5/4 418400 346540835 2765564 2074114 90028 291614 47
DISK 5/5 357400 405687424 2765564 2074114 90028 222676 188
DISK 5/6 421400 344073767 2765564 2074114 90028 285860 95
DISK 5/7 405100 357918255 2765564 2074114 90028 263761 38
DISK 5/8 381500 380059463 2765564 2074114 90028 203510 210
DISK 5/9 918400 157875311 2765564 2074114 90028 221293 37
DISK 5/10 3378600 42915020 2765564 2074114 90028 142211 36
DISK 5/11 559900 258961753 2765564 2074114 90028 252911 174
DISK 5/12 622500 232919976 2765564 2074114 90028 241495 40
DISK 5/13 626700 231359000 2765564 2074114 90028 237973 41
DISK 5/14 802000 180788884 2765564 2074114 90028 231283 42
DISK 5/15 601200 241172131 2765564 2074114 90028 209418 40
DISK 5/16 1387800 104476643 2765564 2074114 90028 211878 36

这里我们关心的几个时间指标包括:ELAPSED(Input IO的总耗时)、LONG_WAIT_TIME_TOTAL(长IO的总等待时间)、LONG_WAIT_TIME_MAX(最长一次的IO等待时间)的单位均为百分之一秒,该视图的具体定义参考这里
从以上chart中(由于列宽的原因只截取了部分数据)我们可以看到从4/4到5/16之间备份的Input IO总耗时(elapsed)虽然间歇性的暴涨到过33786s,但其他IO指标:IO总数、READY IO总数、LONG IO WAIT的次数、LONG IO WAIT的总耗时、最长LONG IO WAIT均没有出现大的变化;基本可以判定在备份期间对数据文件的读取不存在瓶颈,为了进一步诊断需要分析备份输出的IO性能状况:

DEVICE date ELAPSED BYTES BYTES/S IO_COUNT IO_TIME_TOTAL IO_TIME_MAX DISCRETE_BYTES_PER_SECOND
SBT_TAPE 4/5 754900 584663433216 77449123 2230314 440365 2600 132767916
SBT_TAPE 4/5 703900 553432907776 78623797 2111179 381135 5800 145206530
SBT_TAPE 4/12 736400 588200542208 79875142 2243807 433298 3400 135749655
SBT_TAPE 4/12 692300 556839731200 80433299 2124175 369237 2600 150808216
SBT_TAPE 4/19 722200 591873179648 81954193 2257817 395904 3400 149499166
SBT_TAPE 4/19 829000 561043210240 67677106 2140210 510746 2801 109847793
SBT_TAPE 4/26 749600 596010598400 79510485 2273600 435940 2600 136718493
SBT_TAPE 4/26 700300 565171191808 80704154 2155957 377019 2800 149905228
SBT_TAPE 5/3 706800 600177377280 84914739 2289495 396965 5800 151191510
SBT_TAPE 5/3 712500 569155518464 79881476 2171156 392324 5800 145072827
SBT_TAPE 5/10 3379700 604452159488 17884787 2305802 3093781 2802 19537652
SBT_TAPE 5/10 2798400 573396746240 20490164 2187335 2524296 2801 22715115
SBT_TAPE 5/17 428095307776 1633054 2216291 5800 19315844

可以从以上chart中可以发现到5/3为止的output io总耗时还处于合理范围内(为7068+7125)s,约为4小时。而到了5/10时输出IO的总耗时达到了(33797+29784)s,约为17.6小时。研究其他IO统计信息可以发现5/10的IO_TIME_TOTAL总数为(30937+25242)s,IO_TIME_TOTAL代表了某个IO等待的总耗时,单位为百分之一秒。从另一个IO性能指标DISCRETE_BYTES_PER_SECOND来看,在5/10备份文件的平均传输率急剧下降。

综合上述现象,诱发客户环境中的RMAN全库备份缓慢的主要原因是备份文件输出IO性能在一段时间内急剧下降,瓶颈应当存在于RMAN与NBU备份服务器之间,与数据库读取性能没有关系。给客户的建议是复查数据库服务器到NBU备份服务器间的网络带宽是否正常,NBU服务器的性能是否正常,磁带库的写出性能是否正常。

这个case后续经过对NBU的复查发现原因是虚拟磁带库VTL的存储电池过期,导致读写数据时直接从磁盘上进行,而不经过缓存,故影响了数据库全备份的速度。由于VTL存储电池过期信息需要从串口直接访问其存储才能确认问题,所以在问题发生之初无法从VTL的管理界面中获取该信息。

这个case告诉我们不仅是raid卡的冲放电、存储的ups电池过期会引发严重的IO性能问题,VTL的存储电池过期也可能给数据库备份造成麻烦,要让系统7*24时刻正常运行,有太多指标需要我们去关注,良好的规范很重要,同时很多时候我们不得不和硬件打交道。

一些我们需要知道的时间指标

  • L1 cache reference 0.5 ns
  • Branch mispredict 5 ns
  • L2 cache reference 7 ns
  • Mutex lock/unlock 100 ns
  • Main memory reference 100 ns
  • Compress 1K bytes with Zippy 10,000 ns
  • Send 2K bytes over 1 Gbps network 20,000 ns
  • Read 1 MB sequentially from memory 250,000 ns
  • Round trip within same datacenter 500,000 ns
  • Disk seek 10,000,000 ns
  • Read 1 MB sequentially from network 10,000,000 ns
  • Read 1 MB sequentially from disk 30,000,000 ns
  • Send packet CA->Netherlands->CA 150,000,000 ns

Reference:http://surana.wordpress.com/2009/01/01/numbers-everyone-should-know/

常用工具收集页面

DBA在性能调优或诊断过程中多少会使用一些成品工具,以下列出一些我工作中使用较为频繁的工具:

Program platform download URL
nmon AIX POWER http://www.ibm.com/developerworks/wikis/download/attachments/53871937/nmon4aix12e.zip?version=1
nmon Linux http://nmon.sourceforge.net/docs/MPG_nmon_for_Linux_14a_binaries.zip
sarmon Solaris http://sourceforge.net/projects/sarmon/files/
putty Interl x86 http://the.earth.li/~sgtatham/putty/latest/x86/putty.exe
OS watcher Non Windows https://support.oracle.com/CSP/main/article?cmd=show&type=NOT&doctype=REFERENCE&id=301137.1
Procwatcher Non Windows https://support.oracle.com/CSP/main/article?cmd=show&type=NOT&doctype=BULLETIN&id=459694.1
SQLTXPLAIN ALL https://support.oracle.com/CSP/main/article?cmd=show&type=NOT&doctype=TROUBLESHOOTING&id=215187.1
TRCANLZR ALL https://support.oracle.com/CSP/main/article?cmd=show&type=NOT&doctype=TROUBLESHOOTING&id=224270.1
STRMMON Non Windows https://support.oracle.com/CSP/main/article?cmd=show&type=NOT&doctype=BULLETIN&id=290605.1
Oracle Cluster Verification Utility ALL http://www.oracle.com/technetwork/database/clustering/downloads/cvu-download-homepage-099973.html
Oracle Cluster Health Monitor (CHM) Linux and Windows http://www.oracle.com/technetwork/database/clustering/downloads/ipd-download-homepage-087212.html
RDA 4 ALL https://support.oracle.com/CSP/main/article?cmd=show&type=NOT&doctype=DIAGNOSTIC%20TOOLS&id=250262.1
Latest Opatch ALL https://support.oracle.com/CSP/main/article?cmd=show&type=NOT&doctype=BULLETIN&id=224346.1
RAC Diagnostic Information (racdiag.sql) ALL https://www.askmaclean.com/archives/script-to-collect-rac-diagnostic-information-racdiag-sql.html
ass.awk ALL https://www.askmaclean.com/archives/oracle-systemstate-dump-analytic-tool-ass-awk-v1-09.html
LTOM ALL https://support.oracle.com/CSP/main/article?cmd=show&type=NOT&doctype=DIAGNOSTIC%20TOOLS&id=352363.1

今后还会不断更新!

Oracle数据库升级前必要的准备工作

Oracle数据库升级向来是一门纷繁复杂的工程,DBA需要为产品数据库的升级耗费大量时间精力在准备工作上;因为其升级复杂度高,所以即便做了较为充分的准备仍可能在升级过程中遇到意想不到的问题,为了更高效地完成升级任务和减少停机时间,我们有必要为升级工作营造一种”舒适的”防御式的数据库”氛围”:

1.为了保障升级后的数据库性能,我们有必要在升级前有效地收集数据库的性能统计信息,以便升级后若发生性能问题可以做出对比:

  • 为了保证性能统计信息真实有效,有必要在数据库升级前的一个月即开展收集工作
  • 收集的性能统计信息应当尽可能的精确真实
  • 在Oracle 8i/9i中使用Statspack性能报表,将快照级别设置为6或更高,设置快照间隔为30分钟,在具体升级前将perfstat用户使用exp工具导出,参考Metalink文档Note:466350.1介绍了若何对比升级前后的Statspack快照
  • 在Oracle 10g/11g中使用AWR自动负载仓库性能报告,保证采集30天左右的快照,快照间隔最好为30-60分钟;之后可以使用dbms_swrf_internal.awr_extract存储过程将AWR导出到dumpfile文件,在升级完成后载入这部分AWR信息,并可以使用DBMS_WORKLOAD_REPOSITORY.AWR_DIFF_REPORT_HTML函数对比升级前后的性能

2.正式升级前的防御性措施:

  • 过多的审计信息可能会导致升级速度下降,可以在升级前将审计数据导出,并清理审计字典基表:
截断SYS.AUD$基表:
SQL>TRUNCATE TABLE SYS.AUD$;
  • 同样的有必要清理10g后出现的回收站:
清理DBA回收站:
SQL>purge DBA_RECYCLEBIN;
  • 移除一些”过期”的参数,设置这些参数的原因很有可能是为了修正原版本上的一些问题,例如我们都会做的设置event参数;但在新版本中这些参数是否仍有必要设置是一个值得讨论的问题,当然你完全可以就此事去提交一个SR:
这些"过期"参数可能包括:过老的如optimizer_features_enable=8.1.7.4,_always_semi_join=off,_unnest_subquery=false
或者event = "10061 trace name context forever, level 10",如此之类等等。
  • 为数据库中的数据字典收集统计信息:
在Oracle 9i中可以执行以下过程收集数据字典统计信息,
SQL> exec DBMS_STATS.GATHER_SCHEMA_STATS
('SYS', options => 'GATHER',estimate_percent =>
DBMS_STATS.AUTO_SAMPLE_SIZE, method_opt => 'FOR
ALL COLUMNS SIZE AUTO', cascade => TRUE);
在Oracle10g/11g中收集字典统计信息可以由GATHER_DICTIONARY_STATS存储过程来完成:
SQL> exec DBMS_STATS.GATHER_DICTIONARY_STATS;
  • 为策万全,我们有必要为回退数据库升级任务做好准备,10g以前只能通过备份恢复来完成,10g以后我们可以利用闪回数据库的还原点特性来回退数据库,但需要注意以下几点:
    • 利用还原点要求数据库处于归档且打开flashback database的模式下
    • 在特性仅在版本10.2之后可用
    • 必须保证闪回回复区flashback recovery area有足够的磁盘空间
    • 注意在升级后不要立即修改compatible参数,restore point无法跨越compatible工作
/* 首先我们在正式升级前创建一个有效的保证闪回数据库的还原点 */
SQL> create restore point pre11gupgrd guarantee flashback database;
Restore point created.
/* 确认以上4个注意后,我们可以大胆放心地实施升级工作了 */
SQL> shutdown immediate;
..............
SQL> @?/rdbms/admin/catupgrd.sql
.............
upgrade failed
/* 在升级过程中出现了不可绕过的错误时,我们可能不得不回退数据库到还原点,也就是升级前*/
/* 关闭实例后,还原环境到10g下 */
SQL> startup mount;
/* 正式闪回到还原点pre11gupgrd */
SQL> flashback database to restore point pre11gupgrd;
Flashback complete.
SQL> alter database open;
alter database open
*
ERROR at line 1:
ORA-01589: must use RESETLOGS or NORESETLOGS option for database open
SQL>  alter database open resetlogs;
/* 以resetlogs打开数据库 */
/* 之后有必要删除这一个还原点 */
SQL> select * from v$restore_point;
SCN DATABASE_INCARNATION# GUA STORAGE_SIZE
---------- --------------------- --- ------------
TIME
---------------------------------------------------------------------------
NAME
--------------------------------------------------------------------------------
5081633                     3 YES     15941632
08-FEB-11 08.20.33.000000000 PM
PRE11GUPGRD
SQL> drop restore point pre11gupgrd;
Restore point dropped.
  • 下载最新版本的预升级检查脚本(pre-upgrade check script),如utlu102i.sql / utlu111i.sql / utlu112i.sql;Metalink文档Note:884522.1 <How to Download and Run Oracle’s Database Pre-Upgrade Utility> 指出了各版本utluxxx脚本的下载地址
/* 将升级信息spool到日志文件中 */
SQL> SPOOL /tmp/UPGRADE/utlu112i.log
SQL> @/tmp/UPGRADE/utlu112i.sql
  • 需要关注SYS和SYSTEM用户模式下的失效对象,有必要在升级前修复所有的失效对象:
SELECT UNIQUE object_name, object_type, owner
FROM dba_objects
WHERE status = 'INVALID';
  • 在升级完成后推荐执行utlrp.sql脚本以重新编译(Recompile)对象,从11.1.0.7开始升级前后的失效对象将自动对比,执行?/rdbms/admin/utluiobj.sql脚本可以列出对比信息,同时基表registry$sys_inv_objs和registry$nonsys_inv_objs分别列出了数据库中失效的sys或非sys对象:
SQL> select * from v$version;
BANNER
--------------------------------------------------------------------------------
Oracle Database 11g Enterprise Edition Release 11.2.0.1.0 - 64bit Production
PL/SQL Release 11.2.0.1.0 - Production
CORE    11.2.0.1.0      Production
TNS for Linux: Version 11.2.0.1.0 - Production
NLSRTL Version 11.2.0.1.0 - Production
SQL> @?/rdbms/admin/utluiobj.sql
.
Oracle Database 11.1 Post-Upgrade Invalid Objects Tool 02-08-2011 22:23:22
.
This tool lists post-upgrade invalid objects that were not invalid
prior to upgrade (it ignores pre-existing pre-upgrade invalid objects).
.
Owner                     Object Name                     Object Type
.
SH            FWEEK_PSCAT_SALES_MV               MATERIALIZED VIEW
PL/SQL procedure successfully completed.

3.解决升级过程中失效的组件(component)

  • 确保该部分组件确实被link到目前的Oracle软件2进制可执行文件或库文件中
  • 如果确认不会用到某些组件(component),想要通过手动彻底移除这部分组件(亦或者希望reinstall重新安装这部分组件),那么可以参考以下文档:
Note:472937.1 Information On Installed Database Components/Schemas
Note.300056.1 Debug and Validate Invalid Objects
Note:753041.1 How to diagnose Components with NON VALID status
Note.733667.1 How to Determine if XDB is Being Used in the Database?
组件升级失败实例1:数据库从10.2升级到11.2,在10g的环境中Database Vault组件已经安装,
Database Vault组件在升级relink前被turned off,在升级到11.2的过程中XDB组件升级失败;
其原因在于安装或切换Database Vault将使得XDB组件失效,或者由Bug 8942758引起。
解决方案是在升级前执行utlrp.sql脚本重新编译失效对象和组件,在此例中执行utlrp.sql可以使XDB组件valid.
组件升级失败实例2:数据库从10.2.0.4升级到11.1.0.7,在升级过程中"ORACLE SERVER"组件失效;
其原因在于DMBS_SQLPA包引用了某个不存在的列,该问题可以参考metalink文档782735.1和Notes:605317.1/736353.1。
有效的解决方案是:
1.在升级前将SYS.PLAN_TABLE$基表或者同义词PUBLIC.PLAN_TABLE DROP掉
2.若已执行了升级操作并遭遇了该问题,那么可以使用以下手段修复该问题:
@catplan.sql -- recreate the plan table
@dbmsxpln.sql -- reload dbms_xplan spec
@prvtxpln.plb -- reload dbms_xplan implementation
@prvtspao.plb -- reload dbms_sqlpa
alter package SYS.DBMS_SUMADVISOR compile ;
alter package SYS.DBMS_SUMADVISOR compile body;

4. 使用例如AIX上的slibclean等命令清理操作系统环境,在少数专有平台上不清理载入的共享库文件可能导致升级失败

5.在执行catupgrd.sql脚本正式升级前打开sqlplus的echo输出,将升级过程中所有的输出信息转储到日志文件中:

SQL> set echo on
SQL> SPOOL /tmp/upgrade.log
SQL> @catupgrd.sql
SQL> spool off

DBUA图形化升级工具默认使用spool和”echo”输出,这些日志可以在$ORACLE_HOME/cfgtoollogs/dbua//upgrade/目录下找到。

Script:AWR Trending

Kapil Goyal在他的IOUG presentation中共享了几个很有用的AWR性能诊断脚本,在这里共享一下:

 

 

Select extract(day from snap_interval) * 24 * 60 +
extract(hour from snap_interval) * 60 +
extract(minute from snap_interval) "Snapshot Interval",
extract(day from retention) * 24 * 60 +
extract(hour from retention) * 60 + extract(minute from retention) "Retention Interval(Minutes) ",
extract(day from retention) "Retention(in Days) "
from dba_hist_wr_control;

 

 

System Event Trending

 

event_response.sql
alter session set nls_date_format='dd-mon-yy';
set lines 150 pages 100 echo off feedback off
col date_time heading 'Date time|mm/dd/yy_hh_mi_hh_mi' for a30
col event_name for a26
col waits for 99,999,999,999 heading 'Waits'
col time for 99,999 heading 'Total Wait|Time(sec)'
col avg_wait_ms for 99,999 heading 'Avg Wait|(ms)'
prompt "Enter the date in DD-Mon-YY Format:"
WITH system_event AS
(select sn.begin_interval_time begin_interval_time,
sn.end_interval_time end_interval_time,
se.event_name event_name,
se.total_waits e_total_waits,
lag(se.total_waits, 1) over(order by se.snap_id) b_total_waits,
se.total_timeouts e_total_timeouts,
lag(se.total_timeouts, 1) over(order by se.snap_id) b_total_timeouts,
se.time_waited_micro e_time_waited_micro,
lag(se.time_waited_micro, 1) over(order by se.snap_id) b_time_waited_micro
from dba_hist_system_event se, dba_hist_snapshot sn
where trunc(sn.begin_interval_time) = '&Date'
and se.snap_id = sn.snap_id
and se.dbid = sn.dbid
and se.instance_number = sn.instance_number
and se.dbid = (select dbid from v$database)
and se.instance_number = (select instance_number from v$instance)
and se.event_name = '&event_name') select to_char
(se1.BEGIN_INTERVAL_TIME, 'mm/dd/yy_hh24_mi') || to_char
(se1.END_INTERVAL_TIME, '_hh24_mi') date_time,
se1.event_name,
se1.e_total_waits - nvl(se1.b_total_waits,
0) waits,
(se1.e_time_waited_micro - nvl(se1.b_time_waited_micro,
0)) / 1000000 time,
((se1.e_time_waited_micro - nvl(se1.b_time_waited_micro,
0)) / 1000) / (se1.e_total_waits - nvl(se1.b_total_waits,
0)) avg_wait_ms from system_event se1 where(se1.e_total_waits - nvl(se1.b_total_waits,
0)) > 0 and nvl(se1.b_total_waits,
0) > 0
/

 

 

Load Profile Trending

 

alter session set nls_date_format='dd-mon-yy';
set lines 160 pages 1000 echo off feedback off
col stat_name for a25
col date_time for a40
col BEGIN_INTERVAL_TIME for a20
col END_INTERVAL_TIME for a20
prompt "Enter the date in DD-Mon-YY Format and Stats you want to trend like 'redo size','physical reads','physical writes','session logical reads' etc."
WITH sysstat AS
(select sn.begin_interval_time begin_interval_time,
sn.end_interval_time end_interval_time,
ss.stat_name stat_name,
ss.value e_value,
lag(ss.value, 1) over(order by ss.snap_id) b_value
from dba_hist_sysstat ss, dba_hist_snapshot sn
where trunc(sn.begin_interval_time) = '&Date'
and ss.snap_id = sn.snap_id
and ss.dbid = sn.dbid
and ss.instance_number = sn.instance_number
and ss.dbid = (select dbid from v$database)
and ss.instance_number = (select instance_number from v$instance)
and ss.stat_name = '&stat_name')
select to_char(BEGIN_INTERVAL_TIME, 'mm/dd/yy_hh24_mi') || to_char(END_INTERVAL_TIME, '_hh24_mi') date_time,
stat_name,
round((e_value - nvl(b_value,0)) / (extract(day from(end_interval_time - begin_interval_time)) * 24 * 60 * 60
+ extract(hour from(end_interval_time - begin_interval_time)) * 60 * 60
+ extract(minute from(end_interval_time - begin_interval_time)) * 60 + extract(second from(end_interval_time - begin_interval_time))),0) per_sec
from sysstat where(e_value - nvl(b_value,0)) > 0 and nvl(b_value,0) > 0
/

 

 

Time Model Statistics Trend

 

 

alter session set nls_date_format='dd-mon-yy';
set lines 160 pages 1000
col date_time heading 'Date time' for a40
col stat_name heading 'Statistics Name' for a25
col time heading 'Time (s)' for 99,999,999,999
prompt "Enter the date in DD-Mon-YY Format and Stats you want to trend like 'DBtime', 'DB CPU', 'sql execute elapsed time', 'PL/SQL execution elapsed time','parse time elapsed', 'background elapsed time'"
WITH systimemodel AS
(select sn.begin_interval_time begin_interval_time,
sn.end_interval_time end_interval_time,
st.stat_name stat_name,
st.value e_value,
lag(st.value, 1) over(order by st.snap_id) b_value
from DBA_HIST_SYS_TIME_MODEL st, dba_hist_snapshot sn
where trunc(sn.begin_interval_time) = '&Date'
and st.snap_id = sn.snap_id
and st.dbid = sn.dbid
and st.instance_number = sn.instance_number
and st.dbid = (select dbid from v$database)
and st.instance_number = (select instance_number from v$instance)
and st.stat_name = '&stat_name')
select to_char (BEGIN_INTERVAL_TIME, 'mm/dd/yy_hh24_mi') || to_char (END_INTERVAL_TIME, '_hh24_mi') date_time,
stat_name, round((e_value - nvl(b_value, 0)) / 1000000) time
from systimemodel
where(e_value - nvl(b_value, 0)) > 0 and nvl(b_value, 0) > 0
/

 

 

Want to Know if Execution Plan Changed Recently?

 

 

set lines 150 pages 150
col BEGIN_INTERVAL_TIME for a23
col PLAN_HASH_VALUE for 9999999999
col date_time for a30
col snap_id heading 'SnapId'
col executions_delta heading "No. of exec"
col sql_profile heading "SQL|Profile" for a7
col date_time heading 'Date time'
col avg_lio heading 'LIO/exec' for 99999999999.99
col avg_cputime heading 'CPUTIM/exec' for 9999999.99
col avg_etime heading 'ETIME/exec' for 9999999.99
col avg_pio heading 'PIO/exec' for 9999999.99
col avg_row heading 'ROWs/exec' for 9999999.99
SELECT distinct
s.snap_id ,
PLAN_HASH_VALUE,
to_char(s.BEGIN_INTERVAL_TIME,'mm/dd/yy_hh24mi')|| to_char(s.END_INTERVAL_TIME,'_hh24mi') Date_Time,
SQL.executions_delta,
SQL.buffer_gets_delta/decode(nvl(SQL.executions_delta,0),0,1,SQL.executions_delta) avg_lio,
--SQL.ccwait_delta,
(SQL.cpu_time_delta/1000000)/decode(nvl(SQL.executions_delta,0),0,1,SQL.executions_delta) avg_cputime ,
(SQL.elapsed_time_delta/1000000)/decode(nvl(SQL.executions_delta,0),0,1,SQL.executions_delta) avg_etime,
SQL.DISK_READS_DELTA/decode(nvl(SQL.executions_delta,0),0,1,SQL.executions_delta) avg_pio,
SQL.rows_processed_total/decode(nvl(SQL.executions_delta,0),0,1,SQL.executions_delta) avg_row
--,SQL.sql_profile
FROM
dba_hist_sqlstat SQL,
dba_hist_snapshot s
WHERE
SQL.instance_number =(select instance_number from v$instance)
and SQL.dbid =(select dbid from v$database)
and s.snap_id = SQL.snap_id
AND sql_id in
('&SQLID') order by s.snap_id
/

 

 

xp_awr.sql

 

 

select plan_table_output from table (dbms_xplan.display_awr('&sql_id',null,null,
'ADVANCED +PEEKED_BINDS'));

Linode vps磁盘速度实测

事实证明Linode无愧于众多业界人士对其的推崇,今天实测了一下其磁盘速度真的不俗:

[root@li229-25 ~]# hdparm -tT /dev/xvda
/dev/xvda:
Timing cached reads:   25536 MB in  1.99 seconds = 12843.60 MB/sec
Timing buffered disk reads:  340 MB in  3.00 seconds = 113.20 MB/sec
[root@li229-25 ~]# dd if=/dev/xvda of=/root/dump bs=1024k count=1000
1000+0 records in
1000+0 records out
1048576000 bytes (1.0 GB) copied, 18.9223 seconds, 55.4 MB/s
以上为Linode vps的成绩,dd的速度为55MB/s
一下为笔者的台式机电脑,使用普通的西数硬盘
[root@rh2 ~]# cat /proc/scsi/scsi 
Attached devices:
Host: scsi0 Channel: 00 Id: 00 Lun: 00
Vendor: ATA      Model: WDC WD3200AAJS-0 Rev: 01.0
Type:   Direct-Access                    ANSI SCSI revision: 05
[root@rh2 ~]# hdparm -Tt /dev/sda
/dev/sda:
Timing cached reads:   9132 MB in  2.00 seconds = 4569.83 MB/sec
Timing buffered disk reads:  306 MB in  3.01 seconds = 101.72 MB/sec
[root@rh2 ~]# dd if=/dev/sda of=/root/dump bs=1024k count=1000  
1000+0 records in
1000+0 records out
1048576000 bytes (1.0 GB) copied, 22.6009 seconds, 46.4 MB/s
Linode虚拟服务器的磁盘速度略优于普通pc的磁盘速度,作为vps性能还是不错的;
如果作为网页服务器的话,结合memcached等缓存技术,一般来说IO性能不会成为主要瓶颈。

Dropping Very Large Table In Oracle

这是一张550G的大表,表上还包括了CLOB和BLOB对象;我们来观察下Oracle drop这样一个大表时的具体表现:

SQL> select * from v$version;
BANNER
----------------------------------------------------------------
Oracle Database 10g Enterprise Edition Release 10.2.0.4.0 - 64bi
PL/SQL Release 10.2.0.4.0 - Production
CORE    10.2.0.4.0      Production
TNS for Linux: Version 10.2.0.4.0 - Production
NLSRTL Version 10.2.0.4.0 - Production
SQL> show parameter recyclebin
NAME                                 TYPE        VALUE
------------------------------------ ----------- ------------------------------
recyclebin                           string      OFF
/* 为了避免被flashback table骚扰,关闭了recyclebin回收站功能 */
SQL> conn maclean/maclean
Connected.
SQL> col segment_name for a20
SQL> select segment_name,bytes/1024/1024/1024 "size in Gbytes"  from user_segments where segment_name='TV';
SEGMENT_NAME         size in Gbytes
-------------------- --------------
TV                           547.25
SQL> select NUM_ROWS,BLOCKS from user_tables where table_name='TV';
NUM_ROWS     BLOCKS
---------- ----------
859150100   65649786
SQL> desc tv;
Name                                      Null?    Type
----------------------------------------- -------- ----------------------------
OWNER                                              VARCHAR2(30)
OBJECT_NAME                                        VARCHAR2(128)
SUBOBJECT_NAME                                     VARCHAR2(30)
OBJECT_ID                                          NUMBER
DATA_OBJECT_ID                                     NUMBER
OBJECT_TYPE                                        VARCHAR2(19)
CREATED                                            DATE
LAST_DDL_TIME                                      DATE
TIMESTAMP                                          VARCHAR2(19)
STATUS                                             VARCHAR2(7)
TEMPORARY                                          VARCHAR2(1)
GENERATED                                          VARCHAR2(1)
SECONDARY                                          VARCHAR2(1)
SPARE1                                             CLOB
SPARE2                                             CLOB
SPARE3                                             CLOB
SPARE4                                             BLOB
/* 该大表包含CLOB、BLOB2种大对象,共859150100行数据,占用65649786个块
其所在是一个大文件表空间(bigfile tablespace),本地区间管理方式,区间大小统一为128MB 
*/
SQL> col tablespace_name for a2
SQL> select relative_fno,header_block,owner,tablespace_name from dba_segments where segment_name='TV';
RELATIVE_FNO HEADER_BLOCK OWNER                          TA
------------ ------------ ------------------------------ --
1024           82 MACLEAN                        BF
/* 因为是用bigfile tablespace技术,故数据段所在相对数据文件号为1024 */
SQL> col segment_name for a30
SQL> col owner for a10
SQL> select owner,segment_name,segment_type,header_block from dba_segments where relative_fno=1024;
OWNER      SEGMENT_NAME                   SEGMENT_TYPE       HEADER_BLOCK
---------- ------------------------------ ------------------ ------------
MACLEAN    TV                             TABLE                        82
MACLEAN    SYS_IL0000057409C00014$$       LOBINDEX                  32850
MACLEAN    SYS_IL0000057409C00015$$       LOBINDEX                  65618
MACLEAN    SYS_IL0000057409C00016$$       LOBINDEX                  98386
MACLEAN    SYS_IL0000057409C00017$$       LOBINDEX                 131154
MACLEAN    SYS_LOB0000057409C00014$$      LOBSEGMENT                16466
MACLEAN    SYS_LOB0000057409C00015$$      LOBSEGMENT                49234
MACLEAN    SYS_LOB0000057409C00016$$      LOBSEGMENT                82002
MACLEAN    SYS_LOB0000057409C00017$$      LOBSEGMENT               114770
9 rows selected.
/* 该bigfile数据文件1024上的数据段如上包括TV表本身以及LOB对象和LOB索引 */
SQL> select current_scn from v$database;
CURRENT_SCN
-----------
44989856
/* 获取当前scn以便闪回数据库 */
SQL> alter session set events '10046 trace name context forever,level 12';
Session altered.
SQL> set timing on;
SQL> drop table tv;
Table dropped.
Elapsed: 00:00:01.21  
/* 虽然是550G的大表,但drop也仅耗时1.21s再次证明了drop仅仅是修改数据字典 */
通过tkprof分析的trace文件信息:
drop table tv
call     count       cpu    elapsed       disk      query    current        rows
------- ------  -------- ---------- ---------- ---------- ----------  ----------
Parse        1      0.00       0.00          0          0          0           0
Execute      1      0.12       0.29         28          9      30163           0
Fetch        0      0.00       0.00          0          0          0           0
------- ------  -------- ---------- ---------- ---------- ----------  ----------
total        2      0.12       0.29         28          9      30163           0
Misses in library cache during parse: 1
Optimizer mode: ALL_ROWS
Parsing user id: 64
Elapsed times include waiting on following events:
Event waited on                             Times   Max. Wait  Total Waited
----------------------------------------   Waited  ----------  ------------
db file sequential read                        28        0.00          0.00
rdbms ipc reply                                10        0.01          0.06
reliable message                                8        0.00          0.00
enq: RO - fast object reuse                     8        0.00          0.00
write complete waits                            9        0.04          0.10
log file sync                                   1        0.00          0.00
SQL*Net message to client                       1        0.00          0.00
SQL*Net message from client                     1        7.22          7.22   

以上可以看到少量的等待事件,drop要求所被drop对象先做object对象级别的checkpoint检查点,以便将脏块写出;故而会出现Write complete waits(A user wants to modify a block that is part of DBWRÂ’s current write batch. When DBWR grabs buffers to write, it marks them as ‘being written’. All the collected buffers are then written to disk. The wait ‘write complete waits’ implies we wanted a buffer while this flag was set. The flags are cleared as each buffer is written)。同时要求获取enq: RO – fast object reuse(快速重用对象队列锁)。

整个drop流程还包括以下修改数据字典的操作:

delete from object_usage
where obj# in (select a.obj#
from object_usage a, ind$ b
where a.obj# = b.obj#
and b.bo# = :1)
/*  删除该表在对象使用情况基表(object_usage)中的纪录 */
delete from sys.cache_stats_1$ where dataobj# = :1
/* Jonathan Lewis的CBO fundamentals对该基本做了寥寥几笔的描述,该基表用于监控缓存统计信息  
--which is used to monitor lifetime caching statistics 
*/
delete com$ where obj# = :1
/* 删除对象相关的comment,com$ --comment table */
delete from hist_head$ where obj# = :1
delete from histgrm$ where obj# = :1
delete from dependency$ where d_obj# = :1
delete from source$ where obj# = :1
delete from idl_ub1$
where obj# = :1
and part = :2
delete from idl_char$
where obj# = :1
and part = :2
delete from idl_ub2$
where obj# = :1
and part = :2
delete from ncomp_dll$ where obj# = :1 returning dllname into :2
delete from idl_sb4$
where obj# = :1
and part = :2
delete from objauth$ where obj# = :1
delete from col$ where obj# = :1
delete from icol$ where bo# = :1
delete from icoldep$ where obj# in (select obj# from ind$ where bo# = :1)
delete from jijoin$
where obj# in (select obj#
from jijoin$
where tab1obj# = :1
or tab2obj# = :1)
delete from jirefreshsql$
where iobj# in (select iobj# from jirefreshsql$ where tobj# = :1)
delete from ccol$ where obj# = :1
delete from ind$ where bo# = :1
delete from cdef$ where obj# = :1
delete from tab$ where obj# = :1
delete coltype$ where obj# = :1
delete from subcoltype$ where obj# = :1
delete ntab$ where obj# = :1
delete lob$ where obj# = :1
delete refcon$ where obj# = :1
delete from opqtype$ where obj# = :1

之后将出现多次update seg$(数据段基表)的操作:

update seg$ set type#=:4,blocks=:5,extents=:6,minexts=:7,maxexts=:8,extsize=
:9,extpct=:10,user#=:11,iniexts=:12,lists=decode(:13, 65535, NULL, :13),
groups=decode(:14, 65535, NULL, :14), cachehint=:15, hwmincr=:16, spare1=
DECODE(:17,0,NULL,:17),scanhint=:18 
/* 通过将trace文件中的bind variable还原可以看到Oracle实际执行的update语句 */
update seg$
set type#     = 3,
blocks    = 16384,
extents   = 1,
minexts   = 1,
maxexts   = 2147483645,
extsize   = 16384,
extpct    = 0,
user#     = 64,
iniexts   = 16384,
lists     = decode(0, 65535, NULL, :13),
groups    = decode(0, 6 5535, NULL, :14),
cachehint = :0,
hwmincr   = 57411,
spare1    = DECODE(164161, 0, NULL, :17),
scanhint  = 0
where ts# = 12
and file# = 1024
and block# = 82
/*  以上update语句被多次执行,每次变化的仅有block#变量,依次为114770,82002,49234...82,
与上述的1024RFN数据文件上的各数据段的header_block头块对应 
*/
/*  由此可知drop过程中Oracle所需要做的是对段在seg$基表上的纪录做修改,
将type由原值修改为3,也就是临时段的标记;同时extents重置为1。
*/
sql.bsq文件纪录了seg$基表上type#列的含义:
/*  1 = UNDO, 2 = SAVE UNDO, 3 = TEMPORARY, 4 = CACHE, 5 = DATA, 6 = INDEX 
7 = SORT  8 = LOB   9 = Space Header 10 = System Managed Undo      */
delete from obj$ where obj# = :1
/*  每次update完成后,都会伴随有以上删除obj$基表中对应对象纪录的语句 */
delete from seg$ where ts#=:1 and file#=:2 and block#=:3
/* 若关闭了recyclebin(回收站,flashback table闪回表特性),则在最后此前台进程还会删除已经
被置为type#=3的临时段在seg$字典基表上的纪录;如果打开了回收站则不会有此delete from seg$操作;
*/
SQL> select block#,type#,extents from sys.seg$  where file#=1024;
no rows selected

可以看到drop数据表作为一种DDL语句,其所需要完成的主要工作是完整删除数据字典中该表相关信息,并在seg$字典基表上将原有段类型(type)和大小修改为临时段和仅有1个区间,之后在obj$对象字典基表上将该对象的纪录彻底删除,此外还将释放数据文件头的区间位图信息。

SQL> alter system flush buffer_cache;
System altered.
SQL> alter system dump datafile '/g01/bf.dbf' block 3;
System altered.
File Space Bitmap Block:
BitMap Control:
RelFno: 1024, BeginBlock: 17, Flag: 0, First: 0, Free: 63488
0000000000000000 0000000000000000 0000000000000000 0000000000000000

沪ICP备14014813号

沪公网安备 31010802001379号