前言:
当表出现坏块时,我们可以通过备份或者容灾环境对数据进行修复,但如果由于没有备份以及无效的容灾环境导致无法对数据进行修复,这时候我们需要从出现坏块的表里面获取正常数据,最大化的避免数据的损失,获取数据常见的方法一是通过DBMS_REPAIR对表坏块进行标记从而跳过坏块数据获取正常数据(具体操作可以参加之前的文章Oracle-通过DBMS_REPAIR处理表坏块),二是通过ROWID方式从坏块表里面获取数据,本文接下来主要讲述的如何通过ROWID方式从坏块表里面获取数据。
通过ROWID方式获取坏块表的方式:
1 从表所在的索引里面获取rowid,如果坏块表里面的索引没有出现坏块,那么我们可以从索引里面获取数据的rowid,通过rowid去扫描坏块表获取正常的数据。
2 如果表没有索引或者索引损坏,那么我们可以通过dba_extents里面的信息去构造表的rowid,再通过rowid去扫描坏块表获取正常的数据。
通过索引获取ROWID方式获取坏块表数据:
对表进行扫描,出现了ORA-01578坏块报错
SQL> select /*+full(test_tab) */ count(*) from test.test_tab;
select /*+full(test_tab) */ count(*) from test.test_tab
*
ERROR at line 1:
ORA-01578: ORACLE data block corrupted (file # 7, block # 39644)
ORA-01110: data file 7: '/u01/app/oracle/oradata/ORCL/users01.dbf'
通过索引进行扫描,没有出现报错,说明索引正常
---index_ffs指定索引快速全扫描
select /*+ index_ffs(tab1 SYS_C0011103) parallel(tab1) */ rowid
from test.test_tab tab1
where id is NOT NULL
order by rowid;
创建新表用于存放从坏块里面扫描到的正常数据
create table test.test_tab_normal as select * from test.test_tab where 1=2;
创建错误表用于存放坏块信息
create table test.bad_rows (row_id rowid, oracle_error_code VARCHAR2(1000));
通过以下PL SQL代码通过索引获取rowid方式扫描坏块表(mos:1527738.1所提供的脚本)
set serveroutput on
DECLARE
TYPE RowIDTab IS TABLE OF ROWID INDEX BY BINARY_INTEGER;
---创建游标从索引里面获取rowid,指定索引快速全扫描
CURSOR c1 IS select /*+ index_ffs(tab1 SYS_C0011103) parallel(tab1) */ rowid
from test.test_tab tab1
where id is NOT NULL
order by rowid;
r RowIDTab;
rows NATURAL := 20000;
bad_rows number := 0 ;
errors number;
error_code number;
myrowid rowid;
BEGIN
OPEN c1;
LOOP
FETCH c1 BULK COLLECT INTO r LIMIT rows;
EXIT WHEN r.count=0;
BEGIN
---按rowid逐行进行检索,将正常的数据插入test_tab_normal
FORALL i IN r.FIRST..r.LAST SAVE EXCEPTIONS
insert into test.test_tab_normal
select /*+ ROWID(A) */ *
from test.test_tab A where rowid = r(i);
---遇到坏块报错,捕获异常再继续往下处理,将坏块的rowid插入bad_rows表
EXCEPTION
when OTHERS then
BEGIN
errors := SQL%BULK_EXCEPTIONS.COUNT;
FOR err1 IN 1..errors LOOP
error_code := SQL%BULK_EXCEPTIONS(err1).ERROR_CODE;
if error_code in (1410, 8103, 1578) then
myrowid := r(SQL%BULK_EXCEPTIONS(err1).ERROR_INDEX);
bad_rows := bad_rows + 1;
insert into test.bad_rows values(myrowid, error_code);
else
raise;
end if;
END LOOP;
END;
END;
commit;
END LOOP;
commit;
CLOSE c1;
dbms_output.put_line('Total Bad Rows: '||bad_rows);
END;
/
执行完成之后,会将正常的数据插入表test_tab_normal,坏块的rowid信息插入表bad_rows
---正常行数据
SQL> select /*+full(test_tab_normal) */ count(*) from test.test_tab_normal;
COUNT(*)
----------
998748
---错误行数据
select * from bad_rows
ROW_ID ORACLE_ERROR_CODE
------------------ -----------------
AAASI3AAHAAAJrcAAA 1578
AAASI3AAHAAAJrcAAB 1578
AAASI3AAHAAAJrcAAC 1578
AAASI3AAHAAAJrcAAD 1578
AAASI3AAHAAAJrcAAE 1578
AAASI3AAHAAAJrcAAF 1578
AAASI3AAHAAAJrcAAG 1578
AAASI3AAHAAAJrcAAH 1578
AAASI3AAHAAAJrcAAI 1578
AAASI3AAHAAAJrcAAJ 1578
AAASI3AAHAAAJrcAAK 1578
通过dba_extents构造ROWID方式获取坏块表数据:
创建新表用于存放从坏块里面扫描到的正常数据
create table test.test_tab_normal as select * from test.test_tab where 1=2;
创建错误表用于存放坏块信息
create table test.bad_rows (row_id rowid, oracle_error_code VARCHAR2(1000))
执行存储过程,先按extent范围进行rowid扫描,一旦遇到报错,则捕获异常转为对该extent进行逐行的rowid扫描
set serveroutput on
DECLARE
TYPE extent_rec IS record (data_object_id number,relative_fno number,BLOCK_ID number,blocks number);
t_extent extent_rec;
bad_rows number := 0 ;
error_code number;
v_block_id number;
v_rowid rowid;
v_start_rowid rowid;
v_end_rowid rowid;
v_error_rowid rowid;
v_sql1 varchar(1000):='';
v_sql2 varchar(1000):='';
---坏块所在的用户表名
v_table VARCHAR(30):='TEST_TAB';
v_owner VARCHAR(30):='TEST';
---获取段里面每个extent里面的block_id以及block数量,extent_id=0包含段头ID,要从段头ID之后开始
CURSOR c_extent IS select b.data_object_id,a.relative_fno,c.header_block+1 start_block_id,a.blocks-(c.header_block-a.block_id+1)
from dba_extents a,dba_objects b,dba_segments c
where a.segment_name=b.object_name and a.segment_name=c.segment_name and a.owner=c.owner
and a.owner=b.owner
and b.object_name=v_table
and b.owner=v_owner
and a.extent_id=0
union all
select b.data_object_id,a.relative_fno,a.block_id start_block_id,blocks
from dba_extents a,dba_objects b
where a.segment_name=b.object_name
and a.owner=b.owner
and b.object_name=v_table
and b.owner=v_owner
and a.extent_id>0;
BEGIN
OPEN c_extent;
LOOP
FETCH c_extent INTO t_extent ;
exit when c_extent%notfound;
begin
---构造extent的最小开始rowid以及可能的最大rowid,660行是一个块里面最多存放的行数量
v_start_rowid:=dbms_rowid.rowid_create(1,t_extent.data_object_id,t_extent.relative_fno,t_extent.block_id,0);
v_end_rowid:=dbms_rowid.rowid_create(1,t_extent.data_object_id,t_extent.relative_fno,t_extent.block_id+t_extent.blocks-1,660);
---按extent进行rowid数据扫描,将正常的数据插入test_tab_normal
v_sql1:='insert into test.test_tab_normal select /*+ ROWID(A) */ * from '||v_owner||'.'||v_table||' A where rowid between '''||v_start_rowid||''' and '''||v_end_rowid||'''';
execute immediate v_sql1;
commit;
---如果按extent遇到报错或者查询数据失败则捕获异常转成按extent进行逐行的rowid扫描
exception
when others then
---按rowid逐行进行检索,插入test_tab_normal
for a in 0..t_extent.blocks-1 loop
v_block_id:=t_extent.block_id+a;
for b in 0..660 loop
begin
v_rowid:=dbms_rowid.rowid_create(1,t_extent.data_object_id,t_extent.relative_fno,v_block_id,b);
v_sql2:='insert into test.test_tab_normal select /*+ ROWID(A) */ * from '||v_owner||'.'||v_table||' A where rowid ='''||v_rowid||'''';
execute immediate v_sql2;
---遇到坏块报错,插入bad_rows,由于是构造的rowid,可能会话插入很多的空行数据
exception
when others then
error_code:=sqlcode;
if error_code in (-1410, -8103, -1578) then
v_error_rowid:= v_rowid;
bad_rows := bad_rows + 1;
insert into test.bad_rows values(v_error_rowid, error_code||' block_id:'||v_block_id);
commit;
else
raise;
end if;
end;
end loop;
commit;
end loop;
commit;
END;
END LOOP;
commit;
CLOSE c_extent;
dbms_output.put_line('Total Bad Rows: '||bad_rows);
END;
/
执行完成之后,会将正常的数据插入表test_tab_normal,坏块的rowid信息插入表bad_rows,bad_rows里面存放的错误行要比实际坏块的数据多得多,因为是构造的rowid,可能会话插入很多的空行无效数据
SQL> select count(*) from test.test_tab_normal;
COUNT(*)
----------
999877
select * from test.bad_rows
ROW_ID ORACLE_ERROR_CODE
------------------ ----------------------------------------
AAAaQDAAEAAAAWSAKH -1578 block_id:1426
AAAaQDAAEAAAAWSAKI -1578 block_id:1426
AAAaQDAAEAAAAWSAKJ -1578 block_id:1426
AAAaQDAAEAAAAWSAKK -1578 block_id:1426
AAAaQDAAEAAAAWSAKL -1578 block_id:1426
AAAaQDAAEAAAAWSAKM -1578 block_id:1426
AAAaQDAAEAAAAWSAKN -1578 block_id:1426
AAAaQDAAEAAAAWSAKO -1578 block_id:1426
AAAaQDAAEAAAAWSAKP -1578 block_id:1426
AAAaQDAAEAAAAWSAKQ -1578 block_id:1426
AAAaQDAAEAAAAWSAKR -1578 block_id:1426
附:模拟坏块测试数据:
---创建测试数据
create user test identified by oracle;
grant dba,resource,connect,unlimited tablespace to test;
drop table test.test_tab purge;
drop sequence test.test_seq;
create table test.test_tab(id number primary key ,name1 varchar2(200),name2 varchar2(200),name3 varchar2(200),name4 varchar2(200));
create sequence test.test_seq start with 1 maxvalue 10000000 cache 1000;
create or replace procedure test.init_p1 is
v_count number(10):=0;
begin
for i in 1..1000000 loop
insert into test.test_tab values(test.test_seq.nextval,i+1,'aaaaaaaaa','bbbbbbbbbbbbbbbb','cccccccccccccc');
v_count:=v_count+1;
if v_count>=10000 THEN
commit;
v_count:=0;
end if;
end loop;
commit;
end;
/
exec test.init_p1;
exec dbms_stats.gather_table_stats('TEST','TEST_TAB');
---模拟坏块数据
alter system flush buffer_cache;
---用blockrecover clear清理10个数据库,模拟坏块生成
set linesize 200 pagesize 1400
select dbms_rowid.rowid_block_number(rowid) blkid,dbms_rowid.rowid_relative_fno(rowid) rfile
from test.test_tab sample(0.005)
where rownum <= 10;
BLKID RFILE
---------- ----------
39644 7
189897 7
189950 7
190287 7
190395 7
190744 7
191726 7
191583 7
191117 7
191775 7
blockrecover datafile 7 block 39644,189897,189950,190287,190395,190744,191726,191583,191117,191775 clear;
RMAN> blockrecover datafile 7 block 39644,189897,189950,190287,190395,190744,191726,191583,191117,191775 clear;
Starting recover at 2023/02/06 11:12:21
using target database control file instead of recovery catalog
allocated channel: ORA_DISK_1
channel ORA_DISK_1: SID=274 device type=DISK
Finished recover at 2023/02/06 11:12:22
RMAN>