Compare and sync tables: test data

This page explains how to create the test data used in my posts on comparing and syncing tables.

My two tables are called T_SOURCE and T_TARGET. They each have a primary key with two columns, KEY_NUM and KEY_DATE, and two other columns called VAL_TS (a timestamp) and VAL_STRING (a VARCHAR2).

  • For some tests, I make VAL_STRING an “invisible” column: this is a 12c feature that hides the column when you SELECT * FROM…
  • For other tests, I add a “virtual column” called VAL_STRING_TRUNC: this is a 11gR2 feature that creates a column with a generated value. VAL_STRING_TRUNC is simply the first 10 characters of VAL_STRING.

I start by defining 4 substitution variables:

  • num_rows = Number of rows in each table
  • pct = Percent of rows to change (equally divided among inserts, deletes and updates)
  • val_string_length = Length of VARCHAR2 value, in order to make the row wider if necessary
  • num_mods = Number of modifications of each type (insert, delete or update),
    calculated as 1/3 of num_rows times pct
SET timing OFF
set verify off
define num_rows = 300
define pct = 10
define val_string_length = 50
define num_mods = ceil((&num_rows/3)*(&pct/100))
/
select 'val_string_length' var, &val_string_length val from dual
union all
select 'num_rows', &num_rows from dual
union all
select 'pct', &pct from dual
union all
select 'num_mods', &num_mods from dual;
VAR VAL
val_string_length 50
num_rows 300
pct 10
num_mods 10


Each table will have 300 rows and there will be 30 differences requiring 10 inserts, 10 updates and 10 deletes to synchronize. Note that there will be 280 identical rows, not 270, because the “inserts” are in one table and the “deletes” in another.

[Update 2014-12-26: In order to make more lifelike test conditions, I create and populate staging tables, then sort the data randomly when populating the “real” tables.]

-- Drop real tables, create and populate staging tables

DROP TABLE T_TARGET cascade constraints purge;

DROP TABLE T_SOURCE cascade constraints purge;

create table stage_target(
  key_num number,
  key_date date,
  val_ts timestamp,
  val_string varchar2(&val_string_length));

create table t_target(
  key_num number,
  key_date date,
  val_ts timestamp,
  val_string varchar2(&val_string_length),
  constraint pk_target primary key(key_num, key_date));

create table stage_source(
  key_num number,
  key_date date,
  val_ts timestamp,
  val_string varchar2(&val_string_length));

create table t_source(
  key_num number,
  key_date date,
  val_ts timestamp,
  val_string varchar2(&val_string_length),
  constraint pk_source primary key(key_num, key_date));

-- Insert lines that are the same in both tables

insert /*+ append */ into stage_source
select level+&num_mods*3,
  current_date,
  localtimestamp,
  rpad('ZERO - same in source and target ',&val_string_length, '*')
from dual
connect by level <= &num_rows-&num_mods*2;

commit;

insert /*+ append */ into stage_target
select * from stage_source;

commit;

-- Insert lines that are in t_source but not in t_target

insert /*+ append */ into stage_source
select level,
  current_date,
  localtimestamp,
  rpad('INSERT - in source, not in target ',&val_string_length, '*')
from dual
connect by level <= &num_mods;
commit;

insert /*+ append */ into stage_target
select level+&num_mods,
  current_date,
  localtimestamp,
  rpad('DELETE - not in source, in target ',&val_string_length, '*')
from dual
connect by level <= &num_mods;
commit;

insert /*+ append */ into stage_source
  select level+&num_mods*2,
  current_date,
  localtimestamp,
  rpad('UPDATE - put this in target ',&val_string_length, '*')
from dual
connect by level <= &num_mods;
commit;

insert /*+ append */ into stage_target
  select level+&num_mods*2,
  current_date,
  localtimestamp,
  rpad('UPDATE - update this from source ',&val_string_length, '*')
from dual
connect by level <= &num_mods;
commit;

insert /*+ append */ into t_target
select * from stage_target
order by dbms_random.random;
drop table stage_target purge;

insert /*+ append */ into t_source
select * from stage_source
order by dbms_random.random;
drop table stage_source purge;

exec DBMS_STATS.GATHER_TABLE_STATS(
  ownname =>user,tabname =>'T_SOURCE',cascade =>true
);
exec DBMS_STATS.GATHER_TABLE_STATS(
  ownname =>user,tabname =>'T_TARGET',cascade =>true
);

select 't_source' table_name, count(*) cnt, val_string
from t_source
group by 't_source', val_string
union all
select 't_target', count(*), val_string
from t_target
group by 't_target', val_string
order by 3,1;
TABLE_NAME CNT VAL_STRING
t_target 10 DELETE – not in source, in target ****************
t_source 10 INSERT – in source, not in target ****************
t_source 10 UPDATE – put this in target **********************
t_target 10 UPDATE – update this from source *****************
t_source 280 ZERO – same in source and target *****************
t_target 280 ZERO – same in source and target *****************
-- To test invisible columns
alter table t_source modify val_string invisible;
alter table t_target modify val_string invisible;

-- To test virtual columns
alter table t_source add val_string_trunc generated always as (substr(val_string,1,10));
alter table t_target add val_string_trunc generated always as (substr(val_string,1,10));
Advertisement

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s