Bootstrap

Oracle-通过ROWID方式从坏块表里面获取数据

前言:

当表出现坏块时,我们可以通过备份或者容灾环境对数据进行修复,但如果由于没有备份以及无效的容灾环境导致无法对数据进行修复,这时候我们需要从出现坏块的表里面获取正常数据,最大化的避免数据的损失,获取数据常见的方法一是通过DBMS_REPAIR对表坏块进行标记从而跳过坏块数据获取正常数据(具体操作可以参加之前的文章Oracle-通过DBMS_REPAIR处理表坏块),二是通过ROWID方式从坏块表里面获取数据,本文接下来主要讲述的如何通过ROWID方式从坏块表里面获取数据。

 

通过ROWID方式获取坏块表的方式:

1 从表所在的索引里面获取rowid,如果坏块表里面的索引没有出现坏块,那么我们可以从索引里面获取数据的rowid,通过rowid去扫描坏块表获取正常的数据。

2 如果表没有索引或者索引损坏,那么我们可以通过dba_extents里面的信息去构造表的rowid,再通过rowid去扫描坏块表获取正常的数据。

 

通过索引获取ROWID方式获取坏块表数据:

对表进行扫描,出现了ORA-01578坏块报错

SQL> select /*+full(test_tab) */ count(*) from test.test_tab;
​
select /*+full(test_tab) */ count(*) from test.test_tab
                            *
ERROR at line 1:
ORA-01578: ORACLE data block corrupted (file # 7, block # 39644)
ORA-01110: data file 7: '/u01/app/oracle/oradata/ORCL/users01.dbf'

通过索引进行扫描,没有出现报错,说明索引正常

---index_ffs指定索引快速全扫描
select /*+ index_ffs(tab1 SYS_C0011103) parallel(tab1) */ rowid
  from test.test_tab tab1
  where id is NOT NULL
  order by rowid;

创建新表用于存放从坏块里面扫描到的正常数据

create table test.test_tab_normal as select * from test.test_tab where 1=2;

创建错误表用于存放坏块信息

create table test.bad_rows (row_id rowid, oracle_error_code VARCHAR2(1000));

通过以下PL SQL代码通过索引获取rowid方式扫描坏块表(mos:1527738.1所提供的脚本)

set serveroutput on
DECLARE
  TYPE RowIDTab IS TABLE OF ROWID INDEX BY BINARY_INTEGER;
---创建游标从索引里面获取rowid,指定索引快速全扫描
  CURSOR c1 IS select /*+ index_ffs(tab1 SYS_C0011103) parallel(tab1) */ rowid
  from test.test_tab tab1
  where id is NOT NULL
  order by rowid;
  r RowIDTab;
  rows NATURAL := 20000;
  bad_rows number := 0 ;
  errors number;
  error_code number;
  myrowid rowid;
BEGIN
  OPEN c1;
  LOOP
   FETCH c1 BULK COLLECT INTO r LIMIT rows;
   EXIT WHEN r.count=0;
   BEGIN
---按rowid逐行进行检索,将正常的数据插入test_tab_normal
    FORALL i IN r.FIRST..r.LAST SAVE EXCEPTIONS
     insert into test.test_tab_normal
     select /*+ ROWID(A) */ *
     from test.test_tab A where rowid = r(i);
---遇到坏块报错,捕获异常再继续往下处理,将坏块的rowid插入bad_rows表
   EXCEPTION
   when OTHERS then
    BEGIN
     errors := SQL%BULK_EXCEPTIONS.COUNT;
     FOR err1 IN 1..errors LOOP
      error_code := SQL%BULK_EXCEPTIONS(err1).ERROR_CODE;
      if error_code in (1410, 8103, 1578) then
       myrowid := r(SQL%BULK_EXCEPTIONS(err1).ERROR_INDEX);
       bad_rows := bad_rows + 1;
       insert into test.bad_rows values(myrowid, error_code);
      else
       raise;
      end if;
     END LOOP;
    END;
   END;
   commit;
  END LOOP;
  commit;
  CLOSE c1;
  dbms_output.put_line('Total Bad Rows: '||bad_rows);
END;
/

执行完成之后,会将正常的数据插入表test_tab_normal,坏块的rowid信息插入表bad_rows

---正常行数据
​
SQL> select /*+full(test_tab_normal) */ count(*) from test.test_tab_normal;
​
  COUNT(*)
----------
    998748
​
---错误行数据
select * from bad_rows
​
ROW_ID             ORACLE_ERROR_CODE
------------------ -----------------
AAASI3AAHAAAJrcAAA              1578
AAASI3AAHAAAJrcAAB              1578
AAASI3AAHAAAJrcAAC              1578
AAASI3AAHAAAJrcAAD              1578
AAASI3AAHAAAJrcAAE              1578
AAASI3AAHAAAJrcAAF              1578
AAASI3AAHAAAJrcAAG              1578
AAASI3AAHAAAJrcAAH              1578
AAASI3AAHAAAJrcAAI              1578
AAASI3AAHAAAJrcAAJ              1578
AAASI3AAHAAAJrcAAK              1578

通过dba_extents构造ROWID方式获取坏块表数据:

创建新表用于存放从坏块里面扫描到的正常数据

create table test.test_tab_normal as select * from test.test_tab where 1=2;

创建错误表用于存放坏块信息

create table test.bad_rows (row_id rowid, oracle_error_code VARCHAR2(1000))

执行存储过程,先按extent范围进行rowid扫描,一旦遇到报错,则捕获异常转为对该extent进行逐行的rowid扫描

set serveroutput on
DECLARE
  TYPE extent_rec IS record (data_object_id number,relative_fno number,BLOCK_ID number,blocks number);
  t_extent extent_rec;
  bad_rows number := 0 ;
  error_code number;
  v_block_id number;
  v_rowid rowid;
  v_start_rowid rowid;
  v_end_rowid rowid;
  v_error_rowid rowid;
  v_sql1 varchar(1000):='';
  v_sql2 varchar(1000):='';
---坏块所在的用户表名
  v_table VARCHAR(30):='TEST_TAB';
  v_owner VARCHAR(30):='TEST';
---获取段里面每个extent里面的block_id以及block数量,extent_id=0包含段头ID,要从段头ID之后开始
  CURSOR c_extent IS select b.data_object_id,a.relative_fno,c.header_block+1 start_block_id,a.blocks-(c.header_block-a.block_id+1)
  from dba_extents a,dba_objects b,dba_segments c
  where a.segment_name=b.object_name and a.segment_name=c.segment_name and a.owner=c.owner
  and a.owner=b.owner
  and b.object_name=v_table
  and b.owner=v_owner
  and a.extent_id=0
  union all
  select b.data_object_id,a.relative_fno,a.block_id start_block_id,blocks
  from dba_extents a,dba_objects b
  where a.segment_name=b.object_name
  and a.owner=b.owner
  and b.object_name=v_table
  and b.owner=v_owner
  and a.extent_id>0;
BEGIN
  OPEN c_extent;
  LOOP
   FETCH c_extent INTO t_extent ;
   exit when c_extent%notfound;
   begin 
---构造extent的最小开始rowid以及可能的最大rowid,660行是一个块里面最多存放的行数量
   v_start_rowid:=dbms_rowid.rowid_create(1,t_extent.data_object_id,t_extent.relative_fno,t_extent.block_id,0);
   v_end_rowid:=dbms_rowid.rowid_create(1,t_extent.data_object_id,t_extent.relative_fno,t_extent.block_id+t_extent.blocks-1,660);
---按extent进行rowid数据扫描,将正常的数据插入test_tab_normal
   v_sql1:='insert into test.test_tab_normal select /*+ ROWID(A) */ * from '||v_owner||'.'||v_table||' A where rowid between '''||v_start_rowid||''' and '''||v_end_rowid||'''';
   execute immediate v_sql1;
   commit;
---如果按extent遇到报错或者查询数据失败则捕获异常转成按extent进行逐行的rowid扫描
   exception
    when others then
---按rowid逐行进行检索,插入test_tab_normal
       for a in 0..t_extent.blocks-1 loop
      v_block_id:=t_extent.block_id+a;
    for b in 0..660 loop
    begin
      v_rowid:=dbms_rowid.rowid_create(1,t_extent.data_object_id,t_extent.relative_fno,v_block_id,b);
      v_sql2:='insert into test.test_tab_normal select /*+ ROWID(A) */ * from '||v_owner||'.'||v_table||' A where rowid ='''||v_rowid||'''';
      execute immediate v_sql2;
---遇到坏块报错,插入bad_rows,由于是构造的rowid,可能会话插入很多的空行数据
    exception
      when others then
        error_code:=sqlcode;
        if error_code in (-1410, -8103, -1578) then
          v_error_rowid:= v_rowid;
          bad_rows := bad_rows + 1;
          insert into test.bad_rows values(v_error_rowid, error_code||' block_id:'||v_block_id);
          commit;
        else
          raise;
        end if;
    end;
    end loop;
    commit;
     end loop;
     commit;
  END;
  END LOOP;
  commit;
  CLOSE c_extent;
  dbms_output.put_line('Total Bad Rows: '||bad_rows);
END;
/

执行完成之后,会将正常的数据插入表test_tab_normal,坏块的rowid信息插入表bad_rows,bad_rows里面存放的错误行要比实际坏块的数据多得多,因为是构造的rowid,可能会话插入很多的空行无效数据

SQL> select count(*) from test.test_tab_normal;
​
  COUNT(*)
----------
    999877
 
 select * from test.bad_rows
​
ROW_ID       ORACLE_ERROR_CODE
------------------ ----------------------------------------
AAAaQDAAEAAAAWSAKH -1578 block_id:1426
AAAaQDAAEAAAAWSAKI -1578 block_id:1426
AAAaQDAAEAAAAWSAKJ -1578 block_id:1426
AAAaQDAAEAAAAWSAKK -1578 block_id:1426
AAAaQDAAEAAAAWSAKL -1578 block_id:1426
AAAaQDAAEAAAAWSAKM -1578 block_id:1426
AAAaQDAAEAAAAWSAKN -1578 block_id:1426
AAAaQDAAEAAAAWSAKO -1578 block_id:1426
AAAaQDAAEAAAAWSAKP -1578 block_id:1426
AAAaQDAAEAAAAWSAKQ -1578 block_id:1426
AAAaQDAAEAAAAWSAKR -1578 block_id:1426

附:模拟坏块测试数据:

---创建测试数据
create user test identified by oracle;
grant dba,resource,connect,unlimited tablespace to test;
drop table test.test_tab purge;
drop sequence test.test_seq;
create table test.test_tab(id number primary key ,name1 varchar2(200),name2 varchar2(200),name3 varchar2(200),name4 varchar2(200));
create sequence test.test_seq start with 1 maxvalue 10000000 cache 1000;
create or replace procedure  test.init_p1  is
   v_count number(10):=0;
begin
   for i in 1..1000000 loop
     insert into test.test_tab values(test.test_seq.nextval,i+1,'aaaaaaaaa','bbbbbbbbbbbbbbbb','cccccccccccccc');
     v_count:=v_count+1;
     if v_count>=10000 THEN
       commit;
       v_count:=0;
     end if;
   end loop;
   commit;
end;
/
​
exec test.init_p1;
exec dbms_stats.gather_table_stats('TEST','TEST_TAB');
---模拟坏块数据
alter system flush buffer_cache;
---用blockrecover clear清理10个数据库,模拟坏块生成
set linesize 200 pagesize 1400
select dbms_rowid.rowid_block_number(rowid) blkid,dbms_rowid.rowid_relative_fno(rowid) rfile
from test.test_tab sample(0.005)
where rownum <= 10;
​
     BLKID      RFILE
---------- ----------
     39644          7
    189897          7
    189950          7
    190287          7
    190395          7
    190744          7
    191726          7
    191583          7
    191117          7
    191775          7
  
blockrecover datafile 7 block 39644,189897,189950,190287,190395,190744,191726,191583,191117,191775 clear;
RMAN> blockrecover datafile 7 block 39644,189897,189950,190287,190395,190744,191726,191583,191117,191775 clear;
​
Starting recover at 2023/02/06 11:12:21
using target database control file instead of recovery catalog
allocated channel: ORA_DISK_1
channel ORA_DISK_1: SID=274 device type=DISK
Finished recover at 2023/02/06 11:12:22
RMAN> 

 

 

;