1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
|
# preberemo parametre 1->folderpath
params <- commandArgs(trailingOnly=TRUE)
path <- params[1]
# preberemo sfiltrirano tabelo s podatki
dataTable <- read.table(
file = paste(path, 'admin/survey/R/TempData/ttest_data.tmp', sep=''),
sep = ',',
colClasses = 'numeric',
comment.char = '',
quote = '',
header = FALSE
)
# ce ni checkbox moramo podatke preurediti (imamo 2 dodatna parametra)
if(params[3] > 1){
val1 <- params[2]
val2 <- params[3]
dataTableClean <- dataTable[sapply(dataTable[,1], function(x) all((x == val1) || (x == val2))), ]
dataTable[,1][dataTable[,1] != val1] <- 0
dataTable[,1][dataTable[,1] == val1] <- 1
dataTable[,2][dataTable[,2] != val2] <- 0
dataTable[,2][dataTable[,2] == val2] <- 1
}
# pocistimo vse vrstice, ki imajo za numeric (col 3) missing (< 0)
dataTable <- dataTable[sapply(dataTable[,3], function(x) all(x > -1)), ]
dataTableClean <- dataTableClean[sapply(dataTableClean[,3], function(x) all(x > -1)), ]
# podmnozica vrednosti glede na 1. vrednost
col1 <- subset(dataTable, dataTable[,1] == 1)
# podmnozica vrednosti glede na 2. vrednost
col2 <- subset(dataTable, dataTable[,2] == 1)
# n
n1 <- nrow(col1)
n2 <- nrow(col2)
# avg(x)
avg1 <- mean(col1[,3])
if(is.nan(avg1))
avg1 <- 0
avg2 <- mean(col2[,3])
if(is.nan(avg2))
avg2 <- 0
# standardna deviacija (s^2??)
sd1 <- sd(col1[,3])
if(is.na(sd1))
sd1 <- 0
sd2 <- sd(col2[,3])
if(is.na(sd2))
sd2 <- 0
# standardna napaka
se1 = sd1 / sqrt(n1)
if(is.nan(se1))
se1 <- 0
se2 = sd2 / sqrt(n2)
if(is.nan(se2))
se2 <- 0
# kvadrat standardne napake
se21 <- se1^2
se22 <- se2^2
# margini (1,96 * se)
mar1 <- 1.96 * se1
mar2 <- 1.96 * se2
# izvedemo ttest (ce obstajata po vsaj dva primera vsake binarni vrednosti)
if(n1 > 1 && n2 > 1){
ttest <- t.test(dataTableClean[,3]~dataTableClean[,1], var.equal=FALSE)
# razlika povpreèij => $d = x1 -x2
d <- avg1 - avg2
# sed (std. error difference)
#sed <- sqrt(se21 + se22)
#sed <- ttest$estimate/ttest$statistic
sed <- (-1 * diff(ttest$estimate) / ttest$statistic)
#T <- d / sed
T <- ttest$statistic
# signifikanca
sig <- ttest$p.value
} else{
d <- 0
sed <- 0
T <- 0
sig <- 0
}
# podatke vrnemo v x1_x2_x3...--y1_y2...
cat(paste(n1, avg1, sd1, se1, se21, mar1, sep="_"))
cat("--")
cat(paste(n2, avg2, sd2, se2, se22, mar2, sep="_"))
cat("--")
cat(paste(d, sed, T, sig, sep="_"))
|