DATABASE(oracleDB 11g)/Tuning

[SQL Tuning]옵티마이저 통계

SEUNGSAMI 2019. 5. 20. 13:56
옵티마이저 통계




옵티마이저 통계
  • 옵티마이저가 참고하는 통계
  • 시스템 통계 (I/O 속도, CPU 속도 등 하드웨어적 성능)
      • 시스템 통계는 sys가 소유하고 있다.(sys.aux_stats$)
      • 인스턴스가 기동될때 기본적인 통계 정보가 수집이 된다.
  • 객체통계
      • 테이블 통계
      • 컬럼 통계
      • 인덱스 통계
          • user_indexes(blevel, lead_blocks, clustering_factor)
      • 수집방법
          • 자동 : auto task (22:00 - 04:00)
          • 수동 : DBMS_STAT
          • Dynamic Sampling : 옵티마이저가 스스로 진행 


클러스터링 팩터(cf)
  • 클러스터링 이라는 것은 군집되어있다는 말.
  • 인덱스 컬럼의 순서대로 데이터블럭에 행이 정렬된 정도
  • cf 가 blocks에 수렴하면 가장 좋다.
  • cf 가 num_rows에 근접하면 가장 나쁘다.
USER01@orcl2> select table_name, index_name, blevel, leaf_blocks, clustering_factor
              from user_indexes
              where table_name in ('EMPLOYEES');

TABLE_NAME      INDEX_NAME                         BLEVEL LEAF_BLOCKS CLUSTERING_FACTOR
--------------- ------------------------------ ---------- ----------- -----------------
EMPLOYEES       EMPL_DEPTNO_IX                          0           1                 3
EMPLOYEES       EMPL_MGR_IX                             0           1                 7
EMPLOYEES       EMPL_SAL_IX                             0           1                17
EMPLOYEES       EMPL_EMAIL_IX                           0           1                19
EMPLOYEES       EMPL_JOB_IX                             0           1                 3
EMPLOYEES       EMPL_NAME_IX                            0           1                19
EMPLOYEES       EMPL_EMPID_IX                           0           1                 3
-- EMP의 데이터는 107건 하지만, 현재 EMPL_EMPID_IX의 클러스터링 팩터는 3이므로 굉장히 잘 되어있다고 할 수 있다.
-- 하지만 EMPL_NAME_IX의 경우 19개 정도로 많으므로, 클러스터링 팩터가 잘 되어있지않다.(클러스터에 분산되어있다)


USER01@orcl2> select blocks from user_tables where table_name in ('EMPLOYEES');

    BLOCKS
----------
         5




<<추가 실습>>
clustering factor : index를 scan 하는 동안 access하게 되는 테이블의 block의 개수


USER01@orcl2> create table c_table   
              nologging
              as select * from all_objects
              order by object_id;
Table created.


USER01@orcl2> create index c_obj_idx on c_table(object_id);
Index created.


USER01@orcl2> create index c_obj_name_idx on c_table(object_name);
Index created.


USER01@orcl2> execute dbms_stats.gather_table_stats('user01','c_table')
PL/SQL procedure successfully completed.


USER01@orcl2> select num_rows, blocks, avg_row_len from user_tables where table_name = 'C_TABLE';

  NUM_ROWS     BLOCKS AVG_ROW_LEN
---------- ---------- -----------
     71754       1048          97


USER01@orcl2> select index_name, blevel, leaf_blocks, clustering_factor from user_indexes where table_name = 'C_TABLE';

INDEX_NAME                         BLEVEL LEAF_BLOCKS CLUSTERING_FACTOR
------------------------------ ---------- ----------- -----------------
C_OBJ_NAME_IDX                          2         355             57412
C_OBJ_IDX                               1         159              1022


USER01@orcl2> select /*+ gather_plan_statistics index(c c_obj_idx) */ count(*) from c_table c where object_id >=0 and object_name >= ' ';

  COUNT(*)
----------
     71754


USER01@orcl2> select * from table(dbms_xplan.display_cursor(null,null,'allstats last'));

PLAN_TABLE_OUTPUT
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
SQL_ID  8q1jhwwkmkdcp, child number 0
-------------------------------------
select /*+ gather_plan_statistics index(c c_obj_idx) */ count(*) from
c_table c where object_id >=0 and object_name >= ' '

Plan hash value: 1213134236

----------------------------------------------------------------------------------------------------
| Id  | Operation                    | Name      | Starts | E-Rows | A-Rows |   A-Time   | Buffers |
----------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT             |           |      1 |        |      1 |00:00:00.56 |    1182 |
|   1 |  SORT AGGREGATE              |           |      1 |      1 |      1 |00:00:00.56 |    1182 |
|*  2 |   TABLE ACCESS BY INDEX ROWID| C_TABLE   |      1 |  71754 |  71754 |00:00:00.47 |    1182 |
|*  3 |    INDEX RANGE SCAN          | C_OBJ_IDX |      1 |  71754 |  71754 |00:00:00.09 |     160 |
----------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - filter("OBJECT_NAME">=' ')
   3 - access("OBJECT_ID">=0)

=> 1182 - 160 = 1022
160 = index range scan 한 수


USER01@orcl2> select /*+ gather_plan_statistics index(c c_obj_name_idx) */ count(*) from c_table c where object_id >=0 and object_name >= ' ';

  COUNT(*)
----------
     71754


USER01@orcl2> select * from table(dbms_xplan.display_cursor(null,null,'allstats last'));

PLAN_TABLE_OUTPUT
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
SQL_ID  13v7n4yhw1u35, child number 0
-------------------------------------
select /*+ gather_plan_statistics index(c c_obj_name_idx) */ count(*)
from c_table c where object_id >=0 and object_name >= ' '

Plan hash value: 586128974

---------------------------------------------------------------------------------------------------------
| Id  | Operation                    | Name           | Starts | E-Rows | A-Rows |   A-Time   | Buffers |
---------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT             |                |      1 |        |      1 |00:00:00.59 |   57769 |
|   1 |  SORT AGGREGATE              |                |      1 |      1 |      1 |00:00:00.59 |   57769 |
|*  2 |   TABLE ACCESS BY INDEX ROWID| C_TABLE        |      1 |  71754 |  71754 |00:00:00.51 |   57769 |
|*  3 |    INDEX RANGE SCAN          | C_OBJ_NAME_IDX |      1 |  71754 |  71754 |00:00:00.10 |     357 |
---------------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - filter("OBJECT_ID">=0)
   3 - access("OBJECT_NAME">=' ')

=> 57769 - 357 = 57412



구문분석
  • syntax check, semantics check(object 확인, 권한 확인)
  • optimizer 
      • query transformer
      • cost estimator(코스트 예측기) : 다음의 주요정보+@를 가지고 계산
          • selectivity : 예측 비율(%)
              • num_distinct의 갯수가 10이면 1/10 => 0.1
          • cardinality : 예측 건수
              • num_rows의 갯수가 100이면 1/10 * 100 = 10개의 행
          • cost = (IO time + CPU time) / SBI avg time
              • IO time = single block IO(Index scan한 것) time + multi block IO(Full scan한 것) time
              • CPU time = filtering, function, 몇 개 컬럼 선택..
      • plan generator
      • row source generator


히스토그램
  • 컬럼 값의 분포도 통계, 데이터가 불균등 분포일 때 필요.
  • frequency - 빈도 히스토그램(도수분포표)
  • hight balanced - 높이로 균형을 맞춘 히스토그램
  • 실행계획의 e_rows가 a_rows에 근접하게 바뀐다
USER01@orcl2> select column_name, num_distinct, num_buckets
              from user_tab_col_statistics
              where table_name = 'EMPLOYEES';
              
USER01@orcl2> col column_name for a15
              col endpoint_actual_value for a15
              select *
              from user_histograms
              where table_name = 'EMPLOYEES'
              and column_name = 'DEPARTMENT_ID'
              order by endpoint_number;
              
TABLE_NAME      COLUMN_NAME     ENDPOINT_NUMBER ENDPOINT_VALUE ENDPOINT_ACTUAL
--------------- --------------- --------------- -------------- ---------------
EMPLOYEES       DEPARTMENT_ID                 1             10
EMPLOYEES       DEPARTMENT_ID                 3             20
EMPLOYEES       DEPARTMENT_ID                 9             30
EMPLOYEES       DEPARTMENT_ID                10             40
EMPLOYEES       DEPARTMENT_ID                55             50
EMPLOYEES       DEPARTMENT_ID                60             60
EMPLOYEES       DEPARTMENT_ID                61             70
EMPLOYEES       DEPARTMENT_ID                95             80
EMPLOYEES       DEPARTMENT_ID                98             90
EMPLOYEES       DEPARTMENT_ID               104            100
EMPLOYEES       DEPARTMENT_ID               106            110


======================
히스토그램 생성
USER01@orcl2> exec dbms_stats.gather_table_stats(ownname=>'USER01', tabname=>'EMPLOYEES', method_opt=>'for columns size auto DEPARTMENT_ID');
===> 11개 행의 frequeny 히스토그램이 생성
======================
USER01@orcl2> SELECT COUNT(*)
              FROM mcustsum t
              WHERE salegb = 'A'
              AND salemm BETWEEN '200801' AND '200812' ;


USER01@orcl2> @xplan

PLAN_TABLE_OUTPUT
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
SQL_ID  g2jmadq0w1ngv, child number 0
-------------------------------------
SELECT COUNT(*)      FROM mcustsum t      WHERE salegb = 'A'      AND
salemm BETWEEN '200801' AND '200812'

Plan hash value: 3197995868

------------------------------------------------------------------------------------------------------
| Id  | Operation             | Name      | Starts | E-Rows | A-Rows |   A-Time   | Buffers | Reads  |
------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT      |           |      1 |        |      1 |00:00:00.37 |    2579 |   2569 |
|   1 |  SORT AGGREGATE       |           |      1 |      1 |      1 |00:00:00.37 |    2579 |   2569 |
|*  2 |   INDEX FAST FULL SCAN| MCUST_X01 |      1 |    459K|  76571 |00:00:00.28 |    2579 |   2569 |
------------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - filter(("SALEGB"='A' AND "SALEMM">='200801' AND "SALEMM"<='200812'))
-- 옵티마이저가 예측을 잘못해서, fast full scan이 발생함


USER01@orcl2> exec dbms_stats.gather_table_stats(ownname=>'USER01', tabname=>'MCUSTSUM', method_opt=>'for columns size auto salegb');

USER01@orcl2> SELECT COUNT(*)
              FROM mcustsum t
              WHERE salegb = 'A'
              AND salemm BETWEEN '200801' AND '200812' ;

USER01@orcl2> @xplan

PLAN_TABLE_OUTPUT
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
SQL_ID  g2jmadq0w1ngv, child number 1
-------------------------------------
SELECT COUNT(*)      FROM mcustsum t      WHERE salegb = 'A'      AND
salemm BETWEEN '200801' AND '200812'

Plan hash value: 3197995868

---------------------------------------------------------------------------------------------
| Id  | Operation             | Name      | Starts | E-Rows | A-Rows |   A-Time   | Buffers |
---------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT      |           |      1 |        |      1 |00:00:00.23 |    2579 |
|   1 |  SORT AGGREGATE       |           |      1 |      1 |      1 |00:00:00.23 |    2579 |
|*  2 |   INDEX FAST FULL SCAN| MCUST_X01 |      1 |  73807 |  76571 |00:00:00.14 |    2579 |
---------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - filter(("SALEGB"='A' AND "SALEMM">='200801' AND "SALEMM"<='200812'))

Note
-----
   - cardinality feedback used for this statement
-- 낮아졌으나, 여전히 ffs를 진행한다.


USER01@orcl2> @idx
Enter value for tab_name: mcustsum

INDEX_NAME                     INDEX_TYPE      UNIQUENESS      COLUMNS
------------------------------ --------------- --------------- ------------------------------
MCUST_X01                      NORMAL          NONUNIQUE       SALEMM+SALEGB

-- 인덱스가 재대로 안되어있다.


USER01@orcl2> create index sales_new on mcustsum(salegb, salemm);


USER01@orcl2> SELECT COUNT(*)
              FROM mcustsum t
              WHERE salegb = 'A'
              AND salemm BETWEEN '200801' AND '200812' ;


USER01@orcl2> @xplan

PLAN_TABLE_OUTPUT
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
SQL_ID  g2jmadq0w1ngv, child number 1
-------------------------------------
SELECT COUNT(*)      FROM mcustsum t      WHERE salegb = 'A'      AND
salemm BETWEEN '200801' AND '200812'

Plan hash value: 229773664

--------------------------------------------------------------------------------------------------
| Id  | Operation         | Name      | Starts | E-Rows | A-Rows |   A-Time   | Buffers | Reads  |
--------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT  |           |      1 |        |      1 |00:00:00.19 |     216 |    215 |
|   1 |  SORT AGGREGATE   |           |      1 |      1 |      1 |00:00:00.19 |     216 |    215 |
|*  2 |   INDEX RANGE SCAN| SALES_NEW |      1 |  73807 |  76571 |00:00:00.10 |     216 |    215 |
--------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - access("SALEGB"='A' AND "SALEMM">='200801' AND "SALEMM"<='200812')
-- 이제는 RANGE SCAN 이 발생한다.



다중 열 통계
  • 각각의 컬럼이 아닌 두개 이상의 컬럼의 조합으로 통계를 만든다.



시스템 통계
  • sys.aux_stats$
  • exec dbms_stats.gather_system_stats(gathering_mode=>'INTERVAL', interval=>5);
      • INTERVAL은 지정된 간격 동안 시스템 작업을 캡처하는 것을 말한다.
USER01@orcl2> col pname for a15
USER01@orcl2> col pval2 for a18
USER01@orcl2> select * from sys.aux_stats$;

SNAME                          PNAME                PVAL1 PVAL2
------------------------------ --------------- ---------- ------------------
SYSSTATS_INFO                  STATUS                     COMPLETED
SYSSTATS_INFO                  DSTART                     08-13-2009 23:35
SYSSTATS_INFO                  DSTOP                      08-13-2009 23:35
SYSSTATS_INFO                  FLAGS                    1
SYSSTATS_MAIN                  CPUSPEEDNW      2696.05568
SYSSTATS_MAIN                  IOSEEKTIM               10
SYSSTATS_MAIN                  IOTFRSPEED            4096
SYSSTATS_MAIN                  SREADTIM
SYSSTATS_MAIN                  MREADTIM
SYSSTATS_MAIN                  CPUSPEED
SYSSTATS_MAIN                  MBRC
SYSSTATS_MAIN                  MAXTHR
SYSSTATS_MAIN                  SLAVETHR
13 rows selected.


USER01@orcl2> exec dbms_stats.gather_system_stats(gathering_mode=>'INTERVAL', interval=>5);
PL/SQL procedure successfully completed.


USER01@orcl2> select * from sys.aux_stats$;

SNAME                          PNAME                PVAL1 PVAL2
------------------------------ --------------- ---------- ------------------
SYSSTATS_INFO                  STATUS                     AUTOGATHERING
SYSSTATS_INFO                  DSTART                     05-15-2019 16:24
SYSSTATS_INFO                  DSTOP                      05-15-2019 16:29
SYSSTATS_INFO                  FLAGS                    0
SYSSTATS_MAIN                  CPUSPEEDNW      2696.05568
SYSSTATS_MAIN                  IOSEEKTIM               10
SYSSTATS_MAIN                  IOTFRSPEED            4096
SYSSTATS_MAIN                  SREADTIM
SYSSTATS_MAIN                  MREADTIM
SYSSTATS_MAIN                  CPUSPEED
SYSSTATS_MAIN                  MBRC
SYSSTATS_MAIN                  MAXTHR
SYSSTATS_MAIN                  SLAVETHR
SYSSTATS_TEMP                  SBLKRDS               9455
SYSSTATS_TEMP                  SBLKRDTIM         64237480
SYSSTATS_TEMP                  MBLKRDS               1046
SYSSTATS_TEMP                  MBLKRDTIM         10207290
SYSSTATS_TEMP                  CPUCYCLES           230076
SYSSTATS_TEMP                  CPUTIM               65648
SYSSTATS_TEMP                  JOB                      1
SYSSTATS_TEMP                  CACHE_JOB                2
SYSSTATS_TEMP                  MBRTOTAL             22096

22 rows selected.


=========================5분뒤=========================
USER01@orcl2> select * from sys.aux_stats$;

SNAME                          PNAME                PVAL1 PVAL2
------------------------------ --------------- ---------- ------------------
SYSSTATS_INFO                  STATUS                     COMPLETED
SYSSTATS_INFO                  DSTART                     05-15-2019 16:24
SYSSTATS_INFO                  DSTOP                      05-15-2019 16:29
SYSSTATS_INFO                  FLAGS                    0
SYSSTATS_MAIN                  CPUSPEEDNW      2696.05568
SYSSTATS_MAIN                  IOSEEKTIM               10
SYSSTATS_MAIN                  IOTFRSPEED            4096
SYSSTATS_MAIN                  SREADTIM          3258.621
SYSSTATS_MAIN                  MREADTIM
SYSSTATS_MAIN                  CPUSPEED              3476
SYSSTATS_MAIN                  MBRC
SYSSTATS_MAIN                  MAXTHR
SYSSTATS_MAIN                  SLAVETHR

13 rows selected.
-- 들어간 것을 확인



통계 환경 설정
USER01@orcl2> exec dbms_stats.set_table_prefs('SH', 'SALES', 'stale_percent', '13');


USER01@orcl2> col preference_value for a15
USER01@orcl2> col owner for a13


USER01@orcl2> select * from dba_tab_stat_prefs;

OWNER         TABLE_NAME      PREFERENCE_NAME                PREFERENCE_VALU
------------- --------------- ------------------------------ ---------------
SH            SALES           STALE_PERCENT                  13

USER01@orcl2> exec dbms_stats.delete_table_prefs('SH','SALES','stale_percent');
-- 삭제


USER01@orcl2>
dbms_stats.gather_database_stats
dbms_stats.gather_schema_stats('HR')
dbms_stats.gather_table_stats('HR','EMPLOYEES')


USER01@orcl2> select dbms_stats.get_prefs('stale_percent')
              from dual;
              
DBMS_STATS.GET_PREFS('STALE_PERCENT')
----------------------------------------------------------
10


USER01@orcl2> col stale_percent for a15
USER01@orcl2> col cascade for a25
USER01@orcl2> col method_opt for a25
USER01@orcl2> select dbms_stats.get_prefs('stale_percent') stale_percent,
              dbms_stats.get_prefs('cascade') cascade,
              dbms_stats.get_prefs('method_opt') method_opt
              from dual;

STALE_PERC CASCADE                   METHOD_OPT
---------- ------------------------- -------------------------
10         DBMS_STATS.AUTO_CASCADE   FOR ALL COLUMNS SIZE AUTO




통계 Lock
  • 자동 수집 방지
  • 주로 휘발성 테이블에 사용

USER01@orcl2> exec dbms_stats.gather_table_stats('USER01','SALES');
PL/SQL procedure successfully completed.


USER01@orcl2> select table_name, stattype_locked
              from dba_tab_statistics
              where owner = 'USER01' and table_name = 'SALES';
              
TABLE_NAME                     STATT
------------------------------ -----
SALES


USER01@orcl2> exec dbms_stats.lock_table_stats('USER01', 'SALES');
PL/SQL procedure successfully completed.
-- 새로운 통계 수집을 못하게 lock을 걸어 놓는다.


USER01@orcl2> select table_name, stattype_locked
              from dba_tab_statistics
              where owner = 'USER01' and table_name = 'SALES';

TABLE_NAME                     STATT
------------------------------ -----
SALES                          ALL


USER01@orcl2> exec dbms_stats.gather_table_stats('USER01','SALES');
ORA-20005: object statistics are locked (stattype = ALL)
ORA-06512: at "SYS.DBMS_STATS", line 20337
ORA-06512: at "SYS.DBMS_STATS", line 20360
ORA-06512: at line 1


USER01@orcl2> exec dbms_stats.unlock_table_stats('USER01', 'SALES');
-- lock을 해제한다.


USER01@orcl2> exec dbms_stats.gather_table_stats('USER01','SALES');


USER01@orcl2> select table_name, stattype_locked
              from dba_tab_statistics
              where owner = 'USER01' and table_name = 'SALES';

TABLE_NAME                     STATT
------------------------------ -----
SALES                          



=================
통계 RESTORE(복원)
=================
USER01@orcl2> col owner for a10
USER01@orcl2> col table_name for a10
USER01@orcl2> col partition_name for a15
USER01@orcl2> col subpartition_name for a18
USER01@orcl2> col stats_update_time for a35
USER01@orcl2> select * from dba_tab_stats_history where table_name='SALES' and owner = 'USER01';
-- 통계 정보가 저장


OWNER      TABLE_NAME PARTITION_NAME  SUBPARTITION_NAME  STATS_UPDATE_TIME
---------- ---------- --------------- ------------------ -----------------------------------
USER01     SALES                                         16-MAY-19 12.04.47.312931 PM +09:00
USER01     SALES                                         16-MAY-19 12.09.20.256921 PM +09:00
USER01     SALES                                         16-MAY-19 12.09.36.244678 PM +09:00
USER01     SALES                                         16-MAY-19 12.00.34.919117 PM +09:00
USER01     SALES                                         01-MAY-19 04.21.13.137501 PM +09:00
USER01     SALES                                         01-MAY-19 04.21.18.656272 PM +09:00

6 rows selected.


USER01@orcl2> exec dbms_stats.restore_table_stats(ownname=>'USER01', tabname=>'SALES', as_of_timestamp=>'16-MAY-19 12.00.34.919117 PM +09:00')
PL/SQL procedure successfully completed.