範 例 六 : 應用矩陣計算共有環帶
程式檔名稱 : linEAR6.htm
proc format;
invalue animal '+'=1 '-'=0; 以'+', '-' 輸入資料轉為 1,0數字
DATA GEL;
input id $ 1-2 popul $ 3-4
@5 (band1-band10) (animal.); 依動物別讀入資料
if mean(of band1-band10)= . then delete; 取消無法辨視判別錄的輸入
cards;
A Y +++++++++-
B Y +++++++++-
C X ----++---+
D Y ++-+++--+-
E X ++--++---+
F Y +++++++++-
G Y ++-+++--+-
H Y +++++++++-
I Y ++++++-++-
J X ----++---+
K Y +++++++++-
L X --+++----+
;
run;
proc sort data=gel out=gel; by popul id;
proc print data=gel noobs;
依族群別及動物識別碼排序
proc summary; 計算不同族群的動物頭數
class popul;
var band1;
output out=nn n=nn;
DATA GEL; 把 band 的資料以 1,0 的形式讀入矩陣 A
SET GEL; 捨去其它的文字資料或不要分析的資料
*if xxx then delete;
DROP POPUL ID;
PROC IML;
USE GEL;
READ ALL INTO A;
use nn;
read all into nn;
na=nn[2,3];
nb=nn[3,3];
nab=na+nb;
print nn na nb;
C=(A*A`); 計算共有環帶矩陣
PRINT C[format=2.0];
N = NCOL(C);
D = diag(C);
O = C-D;
rowi=J(1, N, 1);
SUMC= rowi * O * rowi`;
SUMD= rowi * D * rowi`;
BS=J(N,N,0);
do i= 1 to N by 1;
do j= (i+1) to N by 1;
BS[i,j] =(2*C[i,j])/ (C[i, i] + C[j, j]);
end;
end;
BS_SUM = (SUMC / (SUMD*(N-1)));
BS_Lynch = 2*(rowi * BS * rowi`) /(N*(N-1));
PRINT BS[format=5.3]; BS 為兩個體共有環帶率矩陣
PRINT BS_SUM BS_Lynch; BS_SUM 為 總計之共有環帶率
BS_Lynch 為依 Lynch(1990) 計算的共有環帶率
rowa=J(1,na,1)||J(1,nb,0);
rowb=J(1,na,0)||J(1,nb,1); 族群間共有環帶率計算
BSx= 2*(rowa * BS * rowa`) /(na*(na-1)); x 族群內之共有環帶數目
BSy= 2*(rowb * BS * rowb`) /(nb*(nb-1)); y 族群內之共有環帶數目
BSxy= (rowa * BS * rowb`) /(na*nb); x 與 y 族群間之共有環帶數目
B_Lynch= 1 + BSxy - 0.5*(BSx+ BSy); 族群間共有環帶率(Lynch, 1990)
Dxy = -log (BSxy /SQRT (BSx * BSy) ); 遺傳距離 (Lynch, 1991)
print BSx BSy BSxy na nb B_Lynch Dxy;
D = (A`*A); Marker 間共有環帶矩陣
PRINT D[format=2.0]; 注意 C 與 D 的不同
M = NCOL(D); C 以個體為主, D 以 Marker為主
B = J(M, N, 1);
B = (B - A`);
E = (B*B`); Marker 間共無環帶矩陣
F = (A`*B`);
E=E+J(M, M, 1); Marker 缺 Polymorphysim 時(如都為 '+')
加入一個空白對照樣本(本例中的 Band 5,
平常可加 * 號予以略去此指令)
R = J(M, M, 0); 相關矩陣計算
do i= 1 to M by 1;
do j= 1 to M by 1;
B=(D[i, j]*E[i, j] - F[i, j]*F[j, i]);
K=SQRT( (D[i, j]+F[i,j])*(F[j,i]+E[i,j])
*(D[i, j]+F[j,i])*(F[i,j]+E[i,j]));
if K<>0 then R[i,j] = B/K;
if((SQRT( (1.0 - R[i,j]*R[i,j])/(N-2))) <> 0)
then T=R[i,j]/(SQRT( (1.0 - R[i,j]*R[i,j])/(N-2)));
if(R[i,j]=1.0) then T=999;
* if(PROBT(abs(T), N-2) < 0.975) then R[i,j]=0.000; 顯著性測定
end;
end;
PRINT R[format=5.3];
PRINT D[format=4.0] E[format=4.0] F[format=4.0];
R= J(M, M, 1) - abs(R); 把相關矩陣 反轉成距離矩陣
0=>1, 0.1=>0.9, -0.9=>0.1, 1=>0
create corr from R; 輸出距離矩陣
append from R;
H=A`; 轉出以 Band 為列的 矩陣
create BAND from H; 轉出以 Band 為錄(Record)的 資料集
append from H;
PROC CLUSTER DATA=corr(type=distance) METHOD=SINGLE;
PROC TREE HORIZONTAL SPACES=2; 以距離矩陣將個體分群
PROC CLUSTER DATA=BAND METHOD=AVERAGE;
PROC TREE HORIZONTAL SPACES=2;
RUN;
輸出 6-1 排序後的資料、族群頭數(NN NA NB)輸出 與 個體與個體間相同條帶的出現
次數矩陣 C(如個體 1 與 個體 2間有 10 條相同電泳條帶)
ID POPUL BAND1 BAND2 BAND3 BAND4 BAND5 BAND6 BAND7 BAND8 BAND9 BAND10
C X 0 0 0 0 1 1 0 0 0 1
E X 1 1 0 0 1 1 0 0 0 1
J X 0 0 0 0 1 1 0 0 0 1
L X 0 0 1 1 1 0 0 0 0 1
A Y 1 1 1 1 1 1 1 1 1 0
B Y 1 1 1 1 1 1 1 1 1 0
D Y 1 1 0 1 1 1 0 0 1 0
F Y 1 1 1 1 1 1 1 1 1 0
G Y 1 1 0 1 1 1 0 0 1 0
H Y 1 1 1 1 1 1 1 1 1 0
I Y 1 1 1 1 1 1 0 1 1 0
K Y 1 1 1 1 1 1 1 1 1 0
NN NA NB
0 12 12 4 8
1 4 4
1 8 8
C
3 3 3 2 2 2 2 2 2 2 2 2
3 5 3 2 4 4 4 4 4 4 4 4
3 3 3 2 2 2 2 2 2 2 2 2
2 2 2 4 3 3 2 3 2 3 3 3
2 4 2 3 9 9 6 9 6 9 8 9
2 4 2 3 9 9 6 9 6 9 8 9
2 4 2 2 6 6 6 6 6 6 6 6
2 4 2 3 9 9 6 9 6 9 8 9
2 4 2 2 6 6 6 6 6 6 6 6
2 4 2 3 9 9 6 9 6 9 8 9
2 4 2 3 8 8 6 8 6 8 8 8
2 4 2 3 9 9 6 9 6 9 8 9
BS_SUM
0.7022727
BS_LYNCH
0.6648579
BS
0.000 0.750 1.000 0.571 0.333 0.333 0.444 0.333 0.444 0.333 0.364 0.333
0.000 0.000 0.750 0.444 0.571 0.571 0.727 0.571 0.727 0.571 0.615 0.571
0.000 0.000 0.000 0.571 0.333 0.333 0.444 0.333 0.444 0.333 0.364 0.333
0.000 0.000 0.000 0.000 0.462 0.462 0.400 0.462 0.400 0.462 0.500 0.462
0.000 0.000 0.000 0.000 0.000 1.000 0.800 1.000 0.800 1.000 0.941 1.000
0.000 0.000 0.000 0.000 0.000 0.000 0.800 1.000 0.800 1.000 0.941 1.000
0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.800 1.000 0.800 0.857 0.800
0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.800 1.000 0.941 1.000
0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.800 0.857 0.800
0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.941 1.000
0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.941
0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
BSX BSY BSXY B_LYNCH DXY
0.6812169 0.9078631 0.4491609 0.6546209 0.560106
D
9 9 6 8 9 9 5 6 8 1
9 9 6 8 9 9 5 6 8 1
6 6 7 7 7 6 5 6 6 1
8 8 7 9 9 8 5 6 8 1
9 9 7 9 12 11 5 6 8 4
9 9 6 8 11 11 5 6 8 3
5 5 5 5 5 5 5 5 5 0
6 6 6 6 6 6 5 6 6 0
8 8 6 8 8 8 5 6 8 0
1 1 1 1 4 3 0 0 0 4
R
1.000 1.000 0.386 0.639 0.433 0.640 0.527 0.617 0.843 -.639
1.000 1.000 0.386 0.639 0.433 0.640 0.527 0.617 0.843 -.639
0.386 0.386 1.000 0.720 0.312 0.033 0.732 0.857 0.537 -.386
0.639 0.639 0.720 1.000 0.433 0.178 0.527 0.617 0.843 -.639
0.433 0.433 0.312 0.433 1.000 0.677 0.228 0.267 0.365 0.192
0.640 0.640 0.033 0.178 0.677 1.000 0.337 0.395 0.539 -.178
0.527 0.527 0.732 0.527 0.228 0.337 1.000 0.854 0.625 -.527
0.617 0.617 0.857 0.617 0.267 0.395 0.854 1.000 0.732 -.617
0.843 0.843 0.537 0.843 0.365 0.539 0.625 0.732 1.000 -.843
-.639 -.639 -.386 -.639 0.192 -.178 -.527 -.617 -.843 1.000
Single Linkage Cluster Analysis
Mean Distance Between Observations = 0.452485
Number Frequency Normalized
of of New Minimum
Clusters --Clusters Joined-- Cluster Distance Tie
9 OB1 OB2 2 0.000000
8 OB3 OB8 2 0.315717
7 CL8 OB7 3 0.322856
6 CL9 OB9 3 0.346367 T
5 CL6 OB4 4 0.346367 T
4 CL5 OB10 5 0.346367
3 CL4 CL7 8 0.592451
2 OB5 OB6 2 0.713829
1 CL3 CL2 10 0.796486
Single Linkage Cluster Analysis
Minimum Distance Between Clusters
0.8 0.7 0.6 0.5 0.4 0.3 0.2 0.1 0
+------+------+------+------+------+------+------+------+
N OB1 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
a XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
m XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
e OB2 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
o XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
f OB9 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX........................
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
O XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
b OB4 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX........................
s XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
e XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
r OB10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX........................
v XXXXXXXXXXXXXXXX
a XXXXXXXXXXXXXXXX
t OB3 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX......................
i XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
o XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
n OB8 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX......................
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
o XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
r OB7 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX.......................
X
C X
l OB5 XXXXXXX..................................................
u XXXXXXX
s XXXXXXX
t OB6 XXXXXXX..................................................
Average Linkage Cluster Analysis
Eigenvalues of the Covariance Matrix
Eigenvalue Difference Proportion Cumulative
1 0.977670 0.245657 0.439951 0.43995
2 0.732012 0.490823 0.329406 0.76936
3 0.241189 0.089822 0.108535 0.87789
4 0.151367 0.079455 0.068115 0.94601
5 0.071913 0.023842 0.032361 0.97837
6 0.048071 0.048071 0.021632 1.00000
7 0.000000 0.000000 0.000000 1.00000
8 0.000000 0.000000 0.000000 1.00000
9 0.000000 0.000000 0.000000 1.00000
10 -.000000 0.000000 -.000000 1.00000
11 -.000000 0.000000 -.000000 1.00000
12 -.000000 . -.000000 1.00000
Root-Mean-Square Total-Sample Standard Deviation = 0.430331
Root-Mean-Square Distance Between Observations = 2.108185
Number Frequency Normalized
of of New RMS
Clusters --Clusters Joined-- Cluster Distance Tie
9 OB1 OB2 2 0.000000
8 OB5 OB6 2 0.474342 T
7 OB3 OB8 2 0.474342 T
6 CL9 OB9 3 0.474342 T
5 CL7 OB7 3 0.580948
4 CL6 OB4 4 0.612372
3 CL4 CL8 6 0.821584
2 CL3 CL5 9 0.961769
1 CL2 OB10 10 1.500000
Average Linkage Cluster Analysis
Average Distance Between Clusters
1.6 1.4 1.2 1 0.8 0.6 0.4 0.2 0
+------+------+------+------+------+------+------+------+
N OB1 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
a XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
m XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
e OB2 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
o XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
f OB9 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX.................
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
O XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
b OB4 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX.....................
s XXXXXXXXXXXXXXXXXXXXXXXX
e XXXXXXXXXXXXXXXXXXXXXXXX
r OB5 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX.................
v XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
a XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
t OB6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX.................
i XXXXXXXXXXXXXXXXXXX
o XXXXXXXXXXXXXXXXXXX
n OB3 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX.................
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
o XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
r OB8 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX.................
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
C XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
l OB7 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX....................
u X
s X
t OB10 X....................................................
Reference
-
Lynch, M (1990). The
similarity index and DNA fingerprinting. Mol Biol Evol,
7: 478–484.
- Lynch, M (1991).
Analysis of population genetic structure by DNA
fingerprinting. In: Burke T et al (eds) DNA
Fingerprinting: Approaches and Applications. Birkhäuser
Verlag: Basel pp113–126.