Commit f60cfa7c by luc.moulinier

parent 1f49e338
 NIST PCA Example

NIST PCA Example

# A Principal Components Analysis example
#
# walk through the NIST/Sematech PCA example data section 6.5
# http://www.itl.nist.gov/div898/handbook/
#
# This file is part of the Hume La Package
# (C)Copyright 2001, Hume Integration Software
#
# These calculations use the Hume Linear Algebra package, La
#
package require La
catch { namespace import La::*}

# Look at the bottom of this file to see the console output of this procedure.

proc nist_pca {} {
puts {Matrix of observation data X[10,3]}
set X {2 10 3 7 4 3 4 1 8 6 3 5 8 6 1 8 5 7 7 2 9 5 3 3 9 5 8 7 4 5 8 2 2}
puts [show \$X]
puts {Compute column norms}
mnorms_br X Xmeans Xsigmas
puts "Means=[show \$Xmeans]"
puts "Sigmas=[show \$Xsigmas %.6f]"
puts {Normalize observation data creating Z[10,3]}
puts [show [set Z [mnormalize \$X \$Xmeans \$Xsigmas]] %10.6f]\n
puts {Naive computation method that is sensitive to round-off error}
puts {ZZ is (Ztranspose)(Z) }
puts "ZZ=\n[show [set ZZ [mmult [transpose \$Z] \$Z]] %10.6f]"
puts {the correlation matrix, R = (Zt)(Z)/(n-1)}
puts "R=\n[show [set R [mscale \$ZZ [expr 1/9.0]]] %8.4f]"
puts {Continuing the naive example use R for the eigensolution}
set V \$R
mevsvd_br V evals
puts "eigenvalues=[show \$evals %8.4f]"
puts "eigenvectors:\n[show \$V %8.4f]\n"
puts "Good computation method- perform SVD of Z"
set U \$Z
msvd_br U S V
puts "Singular values, S=[show \$S]"
puts "square S and divide by (number of observations)-1 to get the eigenvalues"
set SS [mprod \$S \$S]
set evals [mscale \$SS [expr 1.0/9.0]]
puts "eigenvalues=[show \$evals %8.4f]"
puts "eigenvectors:\n[show \$V %8.4f]"
puts "(the 3rd vector/column has inverted signs, thats ok the math still works)"
puts "\nverify V x transpose = I"
puts "As computed\n[show [mmult \$V [transpose \$V]] %20g]\n"
puts "Rounded off\n[show [mround [mmult \$V [transpose \$V]]]]"
#puts "Diagonal matrix of eigenvalues, L"
#set L [mdiag \$evals]
#

puts "\nsquare root of eigenvalues as a diagonal matrix"
proc sqrt x { return [expr sqrt(\$x)] }
set evals_sr [mat_unary_op \$evals sqrt]
set Lhalf [mdiag \$evals_sr]
puts [show \$Lhalf %8.4f]

puts "Factor Structure Matrix S"
set S [mmult \$V \$Lhalf]
puts [show \$S %8.4f]\n
set S2 [mrange \$S 0 1]
puts "Use first two eigenvalues\n[show \$S2 %8.4f]"
# Nash discusses better ways to compute this than multiplying SSt
# the NIST method is sensitive to round-off error
set S2S2 [mmult \$S2 [transpose \$S2]]
puts "SSt for first two components, communality is the diagonal"
puts [show \$S2S2 %8.4f]\n

# define reciprocal square root function
proc recipsqrt x { return [expr 1.0/sqrt(\$x)] }
# use it for Lrhalf calculation
set Lrhalf [mdiag [mat_unary_op \$evals recipsqrt]]

set B [mmult \$V \$Lrhalf]
puts "Factor score coefficients B Matrix:\n[show \$B %12.4f]"

puts "NIST shows B with a mistake, -.18 for -1.2 (-1.18 with a typo)"

puts "\nWork with first normalized observation row"
set z1 {2 3 0 0.065621795889 0.316227766017 -0.7481995314}
puts [show \$z1 %.4f]

puts "compute the \"factor scores\" from multiplying by B"
set t [mmult [transpose \$z1] \$B]
puts [show \$t %.4f]
puts "NIST implies you might chart these"

#set T2 [dotprod \$t \$t]
#puts "Hotelling T2=[format %.4f \$T2]"

puts "Compute the scores using V, these are more familiar"
set t [mmult [transpose \$z1] \$V]
puts [show \$t %.4f]
puts "Calculate T2 from the scores, sum ti*ti/evi"

set T2 [msum [transpose [mdiv [mprod \$t \$t] [transpose \$evals]]]]
puts "Hotelling T2=[format %.4f \$T2]"

puts "Fit z1 using first two principal components"
set p [mrange \$V 0 1]
puts p=\n[show \$p %10.4f]
set t [mmult [transpose \$z1] \$p]
set zhat [mmult \$t [transpose \$p]]
puts "  z1=[show \$z1 %10.6f]"
puts "zhat=[show \$zhat %10.6f]"
set diffs [msub [transpose \$z1] \$zhat]
puts "diff=[show \$diffs %10.6f]"
puts "Q statistic - distance from the model measurement for the first observation"
set Q [dotprod \$diffs \$diffs]
puts Q=[format %.4f \$Q]
puts "Some experts would compute a corrected Q that is larger since the
observation was used in building the model.  The Q calculation just used
properly applies to the analysis of new observations."
set corr [expr 10.0/(10.0 - 2 -1)]  ;# N/(N - Ncomp - 1)
puts "Corrected Q=[format %.4f [expr \$Q * \$corr]] (factor=[format %.4f \$corr])"
}

return
##########################################################################
Console Printout
##########################################################################
Matrix of observation data X[10,3]
7 4 3
4 1 8
6 3 5
8 6 1
8 5 7
7 2 9
5 3 3
9 5 8
7 4 5
8 2 2
Compute column norms
Means=6.9 3.5 5.1
Sigmas=1.523884 1.581139 2.806738
Normalize observation data creating Z[10,3]
0.065622   0.316228  -0.748200
-1.903032  -1.581139   1.033228
-0.590596  -0.316228  -0.035629
0.721840   1.581139  -1.460771
0.721840   0.948683   0.676942
0.065622  -0.948683   1.389513
-1.246814  -0.316228  -0.748200
1.378058   0.948683   1.033228
0.065622   0.316228  -0.035629
0.721840  -0.948683  -1.104485

Naive computation method that is sensitive to round-off error
ZZ is (Ztranspose)(Z)
ZZ=
9.000000   6.017916  -0.911824
6.017916   9.000000  -2.591349
-0.911824  -2.591349   9.000000
the correlation matrix, R = (Zt)(Z)/(n-1)
R=
1.0000   0.6687  -0.1013
0.6687   1.0000  -0.2879
-0.1013  -0.2879   1.0000
Continuing the naive example use R for the eigensolution
eigenvalues=  1.7688   0.9271   0.3041
eigenvectors:
0.6420   0.3847  -0.6632
0.6864   0.0971   0.7207
-0.3417   0.9179   0.2017

Good computation method- perform SVD of Z
Singular values, S=3.98985804571 2.88854344819 1.65449373616
square S and divide by (number of observations)-1 to get the eigenvalues
eigenvalues=  1.7688   0.9271   0.3041
eigenvectors:
0.6420   0.3847   0.6632
0.6864   0.0971  -0.7207
-0.3417   0.9179  -0.2017
(the 3rd vector/column has inverted signs, thats ok the math still works)

verify V x transpose = I
As computed
1        -5.55112e-017         1.38778e-016
-5.55112e-017                    1         5.55112e-017
1.38778e-016         5.55112e-017                    1

Rounded off
1.0 0.0 0.0
0.0 1.0 0.0
0.0 0.0 1.0

square root of eigenvalues as a diagonal matrix
1.3300   0.0000   0.0000
0.0000   0.9628   0.0000
0.0000   0.0000   0.5515
Factor Structure Matrix S
0.8538   0.3704   0.3658
0.9128   0.0935  -0.3975
-0.4544   0.8838  -0.1112

Use first two eigenvalues
0.8538   0.3704
0.9128   0.0935
-0.4544   0.8838
SSt for first two components, communality is the diagonal
0.8662   0.8140  -0.0606
0.8140   0.8420  -0.3321
-0.0606  -0.3321   0.9876

Factor score coefficients B Matrix:
0.4827       0.3995       1.2026
0.5161       0.1009      -1.3069
-0.2569       0.9533      -0.3657
NIST shows B with a mistake, -.18 for -1.2 (-1.18 with a typo)

Work with first normalized observation row
0.0656 0.3162 -0.7482
compute the "factor scores" from multiplying by B
0.3871 -0.6552 -0.0608
NIST implies you might chart these
Compute the scores using V, these are more familiar
0.5148 -0.6308 -0.0335
Calculate T2 from the scores, sum ti*ti/evi
Hotelling T2=0.5828
Fit z1 using first two principal components
p=
0.6420     0.3847
0.6864     0.0971
-0.3417     0.9179
z1=  0.065622   0.316228  -0.748200
zhat=  0.087847   0.292075  -0.754958
diff= -0.022225   0.024153   0.006758
Q statistic - distance from the model measurement for the first observation
Q=0.0011
Some experts would compute a corrected Q that is larger since the
observation was used in building the model.  The Q calculation just used
properly applies to the analysis of new observations.
Corrected Q=0.0016 (factor=1.4286)
\ No newline at end of file
 # A Principal Components Analysis example # # walk through the NIST/Sematech PCA example data section 6.5 # http://www.itl.nist.gov/div898/handbook/ # # This file is part of the Hume La Package # (C)Copyright 2001, Hume Integration Services # # These calculations use the Hume Linear Algebra package, La # package require La catch { namespace import La::*} # Look at the bottom of this file to see the console output of this procedure. proc nist_pca {} { puts {Matrix of observation data X[10,3]} set X {2 10 3 7 4 3 4 1 8 6 3 5 8 6 1 8 5 7 7 2 9 5 3 3 9 5 8 7 4 5 8 2 2} puts [show \$X] puts {Compute column norms} mnorms_br X Xmeans Xsigmas puts "Means=[show \$Xmeans]" puts "Sigmas=[show \$Xsigmas %.6f]" puts {Normalize observation data creating Z[10,3]} puts [show [set Z [mnormalize \$X \$Xmeans \$Xsigmas]] %10.6f]\n puts {Naive computation method that is sensitive to round-off error} puts {ZZ is (Ztranspose)(Z) } puts "ZZ=\n[show [set ZZ [mmult [transpose \$Z] \$Z]] %10.6f]" puts {the correlation matrix, R = (Zt)(Z)/(n-1)} puts "R=\n[show [set R [mscale \$ZZ [expr 1/9.0]]] %8.4f]" puts {Continuing the naive example use R for the eigensolution} set V \$R mevsvd_br V evals puts "eigenvalues=[show \$evals %8.4f]" puts "eigenvectors:\n[show \$V %8.4f]\n" puts "Good computation method- perform SVD of Z" set U \$Z msvd_br U S V puts "Singular values, S=[show \$S]" puts "square S and divide by (number of observations)-1 to get the eigenvalues" set SS [mprod \$S \$S] set evals [mscale \$SS [expr 1.0/9.0]] puts "eigenvalues=[show \$evals %8.4f]" puts "eigenvectors:\n[show \$V %8.4f]" puts "(the 3rd vector/column has inverted signs, thats ok the math still works)" puts "\nverify V x transpose = I" puts "As computed\n[show [mmult \$V [transpose \$V]] %20g]\n" puts "Rounded off\n[show [mround [mmult \$V [transpose \$V]]]]" #puts "Diagonal matrix of eigenvalues, L" #set L [mdiag \$evals] # puts "\nsquare root of eigenvalues as a diagonal matrix" proc sqrt x { return [expr sqrt(\$x)] } set evals_sr [mat_unary_op \$evals sqrt] set Lhalf [mdiag \$evals_sr] puts [show \$Lhalf %8.4f] puts "Factor Structure Matrix S" set S [mmult \$V \$Lhalf] puts [show \$S %8.4f]\n set S2 [mrange \$S 0 1] puts "Use first two eigenvalues\n[show \$S2 %8.4f]" # Nash discusses better ways to compute this than multiplying SSt # the NIST method is sensitive to round-off error set S2S2 [mmult \$S2 [transpose \$S2]] puts "SSt for first two components, communality is the diagonal" puts [show \$S2S2 %8.4f]\n # define reciprocal square root function proc recipsqrt x { return [expr 1.0/sqrt(\$x)] } # use it for Lrhalf calculation set Lrhalf [mdiag [mat_unary_op \$evals recipsqrt]] set B [mmult \$V \$Lrhalf] puts "Factor score coefficients B Matrix:\n[show \$B %12.4f]" puts "NIST shows B with a mistake, -.18 for -1.2 (-1.18 with a typo)" puts "\nWork with first normalized observation row" set z1 {2 3 0 0.065621795889 0.316227766017 -0.7481995314} puts [show \$z1 %.4f] puts "compute the \"factor scores\" from multiplying by B" set t [mmult [transpose \$z1] \$B] puts [show \$t %.4f] puts "NIST implies you might chart these" #set T2 [dotprod \$t \$t] #puts "Hotelling T2=[format %.4f \$T2]" puts "Compute the scores using V, these are more familiar" set t [mmult [transpose \$z1] \$V] puts [show \$t %.4f] puts "Calculate T2 from the scores, sum ti*ti/evi" set T2 [msum [transpose [mdiv [mprod \$t \$t] [transpose \$evals]]]] puts "Hotelling T2=[format %.4f \$T2]" puts "Fit z1 using first two principal components" set p [mrange \$V 0 1] puts p=\n[show \$p %10.4f] set t [mmult [transpose \$z1] \$p] set zhat [mmult \$t [transpose \$p]] puts " z1=[show \$z1 %10.6f]" puts "zhat=[show \$zhat %10.6f]" set diffs [msub [transpose \$z1] \$zhat] puts "diff=[show \$diffs %10.6f]" puts "Q statistic - distance from the model measurement for the first observation" set Q [dotprod \$diffs \$diffs] puts Q=[format %.4f \$Q] puts "Some experts would compute a corrected Q that is larger since the observation was used in building the model. The Q calculation just used properly applies to the analysis of new observations." set corr [expr 10.0/(10.0 - 2 -1)] ;# N/(N - Ncomp - 1) puts "Corrected Q=[format %.4f [expr \$Q * \$corr]] (factor=[format %.4f \$corr])" } return ########################################################################## Console Printout ########################################################################## Matrix of observation data X[10,3] 7 4 3 4 1 8 6 3 5 8 6 1 8 5 7 7 2 9 5 3 3 9 5 8 7 4 5 8 2 2 Compute column norms Means=6.9 3.5 5.1 Sigmas=1.523884 1.581139 2.806738 Normalize observation data creating Z[10,3] 0.065622 0.316228 -0.748200 -1.903032 -1.581139 1.033228 -0.590596 -0.316228 -0.035629 0.721840 1.581139 -1.460771 0.721840 0.948683 0.676942 0.065622 -0.948683 1.389513 -1.246814 -0.316228 -0.748200 1.378058 0.948683 1.033228 0.065622 0.316228 -0.035629 0.721840 -0.948683 -1.104485 Naive computation method that is sensitive to round-off error ZZ is (Ztranspose)(Z) ZZ= 9.000000 6.017916 -0.911824 6.017916 9.000000 -2.591349 -0.911824 -2.591349 9.000000 the correlation matrix, R = (Zt)(Z)/(n-1) R= 1.0000 0.6687 -0.1013 0.6687 1.0000 -0.2879 -0.1013 -0.2879 1.0000 Continuing the naive example use R for the eigensolution eigenvalues= 1.7688 0.9271 0.3041 eigenvectors: 0.6420 0.3847 -0.6632 0.6864 0.0971 0.7207 -0.3417 0.9179 0.2017 Good computation method- perform SVD of Z Singular values, S=3.98985804571 2.88854344819 1.65449373616 square S and divide by (number of observations)-1 to get the eigenvalues eigenvalues= 1.7688 0.9271 0.3041 eigenvectors: 0.6420 0.3847 0.6632 0.6864 0.0971 -0.7207 -0.3417 0.9179 -0.2017 (the 3rd vector/column has inverted signs, thats ok the math still works) verify V x transpose = I As computed 1 -5.55112e-017 1.38778e-016 -5.55112e-017 1 5.55112e-017 1.38778e-016 5.55112e-017 1 Rounded off 1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 square root of eigenvalues as a diagonal matrix 1.3300 0.0000 0.0000 0.0000 0.9628 0.0000 0.0000 0.0000 0.5515 Factor Structure Matrix S 0.8538 0.3704 0.3658 0.9128 0.0935 -0.3975 -0.4544 0.8838 -0.1112 Use first two eigenvalues 0.8538 0.3704 0.9128 0.0935 -0.4544 0.8838 SSt for first two components, communality is the diagonal 0.8662 0.8140 -0.0606 0.8140 0.8420 -0.3321 -0.0606 -0.3321 0.9876 Factor score coefficients B Matrix: 0.4827 0.3995 1.2026 0.5161 0.1009 -1.3069 -0.2569 0.9533 -0.3657 NIST shows B with a mistake, -.18 for -1.2 (-1.18 with a typo) Work with first normalized observation row 0.0656 0.3162 -0.7482 compute the "factor scores" from multiplying by B 0.3871 -0.6552 -0.0608 NIST implies you might chart these Compute the scores using V, these are more familiar 0.5148 -0.6308 -0.0335 Calculate T2 from the scores, sum ti*ti/evi Hotelling T2=0.5828 Fit z1 using first two principal components p= 0.6420 0.3847 0.6864 0.0971 -0.3417 0.9179 z1= 0.065622 0.316228 -0.748200 zhat= 0.087847 0.292075 -0.754958 diff= -0.022225 0.024153 0.006758 Q statistic - distance from the model measurement for the first observation Q=0.0011 Some experts would compute a corrected Q that is larger since the observation was used in building the model. The Q calculation just used properly applies to the analysis of new observations. Corrected Q=0.0016 (factor=1.4286) \ No newline at end of file
lib/la1.0/defs 0 → 100755
 # This file contains support code for the Tcl test suite. It is # normally sourced by the individual files in the test suite before # they run their tests. This improved approach to testing was designed # and initially implemented by Mary Ann May-Pumphrey of Sun Microsystems. # This file is NOT Copyright by Hume Integration # set VERBOSE 0 set TESTS {} set auto_noexec 1 # If tests are being run as root, issue a warning message and set a # variable to prevent some tests from running at all. set user {} catch {set user [exec whoami]} # Some of the tests don't work on some system configurations due to # configuration quirks, not due to Tcl problems; in order to prevent # false alarms, these tests are only run in the master source directory # at Berkeley. The presence of a file "Berkeley" in this directory is # used to indicate that these tests should be run. set atBerkeley [file exists Berkeley] proc print_verbose {test_name test_description contents_of_test code answer} { puts stdout "\n" puts stdout "==== \$test_name \$test_description" puts stdout "==== Contents of test case:" puts stdout "\$contents_of_test" if {\$code != 0} { if {\$code == 1} { puts stdout "==== Test generated error:" puts stdout \$answer } elseif {\$code == 2} { puts stdout "==== Test generated return exception; result was:" puts stdout \$answer } elseif {\$code == 3} { puts stdout "==== Test generated break exception" } elseif {\$code == 4} { puts stdout "==== Test generated continue exception" } else { puts stdout "==== Test generated exception \$code; message was:" puts stdout \$answer } } else { puts stdout "==== Result was:" puts stdout "\$answer" } } proc test {test_name test_description contents_of_test passing_results} { global VERBOSE global TESTS if {[string compare \$TESTS ""] != 0} then { set ok 0 foreach test \$TESTS { if [string match \$test \$test_name] then { set ok 1 break } } if !\$ok then return } set code [catch {uplevel \$contents_of_test} answer] if {\$code != 0} { print_verbose \$test_name \$test_description \$contents_of_test \ \$code \$answer } elseif {[string compare \$answer \$passing_results] == 0} then { if \$VERBOSE then { print_verbose \$test_name \$test_description \$contents_of_test \ \$code \$answer puts stdout "++++ \$test_name PASSED" } } else { print_verbose \$test_name \$test_description \$contents_of_test \$code \ \$answer puts stdout "---- Result should have been:" puts stdout "\$passing_results" puts stdout "---- \$test_name FAILED" } } proc dotests {file args} { global TESTS set savedTests \$TESTS set TESTS \$args source \$file set TESTS \$savedTests }
 La Package Documentation
lib/la1.0/la.html 0 → 100755
This source diff could not be displayed because it is too large. You can view the blob instead.
lib/la1.0/la.tcl 0 → 100755
This diff is collapsed.
lib/la1.0/la.test 0 → 100755
This diff is collapsed.