performance e SAS fullstimer option

Anni fa ero un fanatico programmatore di SAS, poi ho conosciuto R ed ho cambiato opinione. Capita…

Il codice R che qui presento permette di estrapolare da una coppia di log SAS le tempistiche e la memoria utilizzate dai datastep (forniti dall’opzione “fullstimer“).
Un comodo strumento per sintetizzare in un file .csv le performance di uno script eseguito su ambienti diversi.

?Download Rcode.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
 
 
#setwd("/Users/dariosolari/Documents/Rdocs/logSAS/")
 
SASLog1 <- readLines("TS_AZS_Reddito.log")
SASLog2 <- readLines("MI_AZS_Reddito.log")
 
 
########################
# Elaborazione SASLog1 #
########################
 
SASLog.datastep <- SASLog1[grep("real time",SASLog1)-1]
SASLog.datastep <- gsub("(.*NOTE[:][[:space:]])(.*)(used [(]Total process time[)][:]$)","\\2",SASLog.datastep,perl=TRUE)
 
SASLog1.realtime <- SASLog1[grep("real time",SASLog1)]
SASLog1.realtime <- gsub("(.*[[:space:]])([[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])([[:space:]].*)?","\\2",SASLog1.realtime,perl=TRUE)
SASLog1.realtime <- 
# secondi
as.numeric(gsub("([[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]][:])?([[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]]$)","\\2",SASLog1.realtime,perl=TRUE)) +
# minuti
ifelse(nchar(gsub("([[:digit:]]?[[:digit:]]?[:]?)([[:digit:]][[:digit:]]$)","\\2",gsub("([:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])","",SASLog1.realtime,perl=TRUE),perl=TRUE))==0,0,as.numeric(gsub("([[:digit:]]?[[:digit:]]?[:]?)([[:digit:]][[:digit:]]$)","\\2",gsub("([:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])","",SASLog1.realtime,perl=TRUE),perl=TRUE))*60) +
# ore
ifelse(nchar(gsub("([:]?[[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])","",SASLog1.realtime,perl=TRUE))==0,0,as.numeric(gsub("([:]?[[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])","",SASLog1.realtime,perl=TRUE))*3600)
 
SASLog1.usertime <- SASLog1[grep("user cpu time",SASLog1)]
SASLog1.usertime <- gsub("(.*[[:space:]])([[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])([[:space:]].*)?","\\2",SASLog1.usertime,perl=TRUE)
SASLog1.usertime <- 
# secondi
as.numeric(gsub("([[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]][:])?([[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]]$)","\\2",SASLog1.usertime,perl=TRUE)) +
# minuti
ifelse(nchar(gsub("([[:digit:]]?[[:digit:]]?[:]?)([[:digit:]][[:digit:]]$)","\\2",gsub("([:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])","",SASLog1.usertime,perl=TRUE),perl=TRUE))==0,0,as.numeric(gsub("([[:digit:]]?[[:digit:]]?[:]?)([[:digit:]][[:digit:]]$)","\\2",gsub("([:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])","",SASLog1.usertime,perl=TRUE),perl=TRUE))*60) +
# ore
ifelse(nchar(gsub("([:]?[[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])","",SASLog1.usertime,perl=TRUE))==0,0,as.numeric(gsub("([:]?[[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])","",SASLog1.usertime,perl=TRUE))*3600)
 
 
SASLog1.systemtime <- SASLog1[grep("system cpu time",SASLog1)]
SASLog1.systemtime <- gsub("(.*[[:space:]])([[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])([[:space:]].*)?","\\2",SASLog1.systemtime,perl=TRUE)
SASLog1.systemtime <- 
# secondi
as.numeric(gsub("([[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]][:])?([[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]]$)","\\2",SASLog1.systemtime,perl=TRUE)) +
# minuti
ifelse(nchar(gsub("([[:digit:]]?[[:digit:]]?[:]?)([[:digit:]][[:digit:]]$)","\\2",gsub("([:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])","",SASLog1.systemtime,perl=TRUE),perl=TRUE))==0,0,as.numeric(gsub("([[:digit:]]?[[:digit:]]?[:]?)([[:digit:]][[:digit:]]$)","\\2",gsub("([:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])","",SASLog1.systemtime,perl=TRUE),perl=TRUE))*60) +
# ore
ifelse(nchar(gsub("([:]?[[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])","",SASLog1.systemtime,perl=TRUE))==0,0,as.numeric(gsub("([:]?[[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])","",SASLog1.systemtime,perl=TRUE))*3600)
 
SASLog1.memory <- SASLog1[grep("Memory",SASLog1)]
SASLog1.memory <- as.numeric(gsub("(.*Memory.*[[:space:]])([[:digit:]]*)([k]$)","\\2",SASLog1.memory,perl=TRUE))
 
# No record installazione
SASLog1.realtime <- SASLog1.realtime[(ifelse(length(SASLog1.realtime)==length(SASLog1.memory),1,2)):(length(SASLog1.realtime))]
SASLog1.usertime <- SASLog1.usertime[(ifelse(length(SASLog1.usertime)==length(SASLog1.memory),1,2)):(length(SASLog1.usertime))]
SASLog1.systemtime <- SASLog1.systemtime[(ifelse(length(SASLog1.systemtime)==length(SASLog1.memory),1,2)):(length(SASLog1.systemtime))]
 
 
 
 
########################
# Elaborazione SASLog2 #
########################
 
SASLog2.realtime <- SASLog2[grep("real time",SASLog2)]
SASLog2.realtime <- gsub("(.*[[:space:]])([[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])([[:space:]].*)?","\\2",SASLog2.realtime,perl=TRUE)
SASLog2.realtime <- 
# secondi
as.numeric(gsub("([[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]][:])?([[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]]$)","\\2",SASLog2.realtime,perl=TRUE)) +
# minuti
ifelse(nchar(gsub("([[:digit:]]?[[:digit:]]?[:]?)([[:digit:]][[:digit:]]$)","\\2",gsub("([:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])","",SASLog2.realtime,perl=TRUE),perl=TRUE))==0,0,as.numeric(gsub("([[:digit:]]?[[:digit:]]?[:]?)([[:digit:]][[:digit:]]$)","\\2",gsub("([:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])","",SASLog2.realtime,perl=TRUE),perl=TRUE))*60) +
# ore
ifelse(nchar(gsub("([:]?[[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])","",SASLog2.realtime,perl=TRUE))==0,0,as.numeric(gsub("([:]?[[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])","",SASLog2.realtime,perl=TRUE))*3600)
 
SASLog2.usertime <- SASLog2[grep("user cpu time",SASLog2)]
SASLog2.usertime <- gsub("(.*[[:space:]])([[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])([[:space:]].*)?","\\2",SASLog2.usertime,perl=TRUE)
SASLog2.usertime <- 
# secondi
as.numeric(gsub("([[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]][:])?([[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]]$)","\\2",SASLog2.usertime,perl=TRUE)) +
# minuti
ifelse(nchar(gsub("([[:digit:]]?[[:digit:]]?[:]?)([[:digit:]][[:digit:]]$)","\\2",gsub("([:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])","",SASLog2.usertime,perl=TRUE),perl=TRUE))==0,0,as.numeric(gsub("([[:digit:]]?[[:digit:]]?[:]?)([[:digit:]][[:digit:]]$)","\\2",gsub("([:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])","",SASLog2.usertime,perl=TRUE),perl=TRUE))*60) +
# ore
ifelse(nchar(gsub("([:]?[[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])","",SASLog2.usertime,perl=TRUE))==0,0,as.numeric(gsub("([:]?[[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])","",SASLog2.usertime,perl=TRUE))*3600)
 
 
SASLog2.systemtime <- SASLog2[grep("system cpu time",SASLog2)]
SASLog2.systemtime <- gsub("(.*[[:space:]])([[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])([[:space:]].*)?","\\2",SASLog2.systemtime,perl=TRUE)
SASLog2.systemtime <- 
# secondi
as.numeric(gsub("([[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]][:])?([[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]]$)","\\2",SASLog2.systemtime,perl=TRUE)) +
# minuti
ifelse(nchar(gsub("([[:digit:]]?[[:digit:]]?[:]?)([[:digit:]][[:digit:]]$)","\\2",gsub("([:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])","",SASLog2.systemtime,perl=TRUE),perl=TRUE))==0,0,as.numeric(gsub("([[:digit:]]?[[:digit:]]?[:]?)([[:digit:]][[:digit:]]$)","\\2",gsub("([:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])","",SASLog2.systemtime,perl=TRUE),perl=TRUE))*60) +
# ore
ifelse(nchar(gsub("([:]?[[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])","",SASLog2.systemtime,perl=TRUE))==0,0,as.numeric(gsub("([:]?[[:digit:]]?[[:digit:]]?[:]?[[:digit:]]?[[:digit:]][.][[:digit:]][[:digit:]])","",SASLog2.systemtime,perl=TRUE))*3600)
 
SASLog2.memory <- SASLog2[grep("Memory",SASLog2)]
SASLog2.memory <- as.numeric(gsub("(.*Memory.*[[:space:]])([[:digit:]]*)([k]$)","\\2",SASLog2.memory,perl=TRUE))
 
# No record installazione
SASLog2.realtime <- SASLog2.realtime[(ifelse(length(SASLog2.realtime)==length(SASLog2.memory),1,2)):(length(SASLog2.realtime))]
SASLog2.usertime <- SASLog2.usertime[(ifelse(length(SASLog2.usertime)==length(SASLog2.memory),1,2)):(length(SASLog2.usertime))]
SASLog2.systemtime <- SASLog2.systemtime[(ifelse(length(SASLog2.systemtime)==length(SASLog2.memory),1,2)):(length(SASLog2.systemtime))]
 
#######################
# Elaborazione Report #
#######################
 
CheckLogs <- data.frame(cbind(SASLog.datastep,SASLog1.realtime,SASLog2.realtime,SASLog1.usertime,SASLog2.usertime,SASLog1.systemtime,SASLog2.systemtime,SASLog1.memory,SASLog2.memory))
names(CheckLogs) <-c("datastep","realtime1","realtime2","usertime1","usertime2","systemtime1","systemtime2","memory1","memory2")
 
sintesi <- data.frame(cbind("TOTAL JOB",as.numeric(sum(SASLog1.realtime)),sum(SASLog2.realtime),sum(SASLog1.usertime),sum(SASLog2.usertime),sum(SASLog1.systemtime),sum(SASLog2.systemtime),sum(SASLog1.memory),sum(SASLog2.memory)))
names(sintesi) <-c("datastep","realtime1","realtime2","usertime1","usertime2","systemtime1","systemtime2","memory1","memory2")
 
CheckLogs <- rbind(CheckLogs,sintesi)
CheckLogs
write.csv(CheckLogs,"Logs.csv")