summaryrefslogtreecommitdiff
path: root/FS/bin/freeside-torrus-srvderive
blob: cd893b44e72d03bf10b5b1a4f36df16100968008 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
#!/usr/bin/perl -w

use strict;
use POSIX qw( :sys_wait_h );
use Sys::SigAction qw( set_sig_handler );
use Date::Parse;
use Date::Format;
use IPC::Run qw( run ); #timeout );
use FS::Daemon ':all'; #daemonize1 drop_root daemonize2 myexit logfile sig*
use FS::UID qw( adminsuidsetup forksuidsetup dbh driver_name );
use FS::Record qw( qsearch str2time_sql str2time_sql_closing concat_sql );
use FS::torrus_srvderive;

our $DEBUG = 2;
our $max_kids = 4;
our $search_timeout = 2*60*60; #60*60; #30*60; # 15*60 5*60; #$torrus_srvderive->last_srv_date ? 5*60 : 15*60);
our %kids;

my $user = shift or die &usage;
$FS::Daemon::PID_NEWSTYLE = 1;
daemonize1('torrus-srvderive');

drop_root();

adminsuidsetup($user);

logfile( "%%%FREESIDE_LOG%%%/torrus-srvderive-log.". $FS::UID::datasrc );

daemonize2();

our $conf = new FS::Conf;

die "not running: network_monitoring_system not Torrus_Internal\n"
  unless _shouldrun();

#--

my $str2time = str2time_sql();
my $c = str2time_sql_closing();

my $_date = concat_sql([ 'srvexport.srv_date', "' '", 'srvexport.srv_time' ]);
$_date = "CAST( $_date AS TIMESTAMP )" if driver_name =~ /^Pg/i;
$_date = str2time_sql. $_date.  str2time_sql_closing;

my $other_date = concat_sql([ 'other.srv_date', "' '", 'other.srv_time' ]);
$other_date = "CAST( $other_date AS TIMESTAMP )" if driver_name =~ /^Pg/i;
$other_date = str2time_sql. $other_date.  str2time_sql_closing;

my $in  = concat_sql([ '?', "'_IN'" ]);
my $out = concat_sql([ '?', "'_OUT'" ]);

my $sql = "
  SELECT DISTINCT srv_date, srv_time FROM srvexport
    WHERE NOT EXISTS (
                       SELECT 1 FROM srvexport AS other
                         WHERE other.serviceid IN ( $in, $out )
                           AND srvexport.srv_date = other.srv_date
                           AND ABS( $_date - $other_date ) <= 60
                     )
";

my $orderlimit = "
    ORDER BY srv_date, srv_time
    LIMIT 200
";

if ( driver_name =~ /mysql/i ) {
  dbh->do('SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED');
  dbh->commit or die dbh->errstr;
}

our $kids = 0;

#MAIN: while (1) {
while (1) {

  my $found = 0;

  #SERVICEID: foreach my $torrus_srvderive ( qsearch('torrus_srvderive', {}) ) {
  foreach my $torrus_srvderive ( qsearch('torrus_srvderive', {}) ) {

    &reap_kids;
    if ( $kids >= $max_kids ) {
      sleep 5;
      myexit() if sigterm() || sigint();
      redo;
    }

    defined( my $pid = fork ) or do {
      #warn "WARNING: can't fork: $!\n";
      #next; #don't increment the kid counter
      die "can't fork: $!\n";
    };

    if ( $pid ) {
      $kids++;
      $kids{$pid} = 1;
    } else { #kid time

      #get new db handle
      $FS::UID::dbh->{InactiveDestroy} = 1;

      forksuidsetup($user);

      my $serviceid = $torrus_srvderive->serviceid;

      my @serviceids = $torrus_srvderive->component_serviceids;
      exit unless @serviceids; #don't try to search for empty virtual ports

      #nonlocking select statements; rows in this table never change
      if ( driver_name =~ /mysql/i ) {
        dbh->do('SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED');
        dbh->commit or die dbh->errstr;
      }

      my @in = ();
      for my $dir ('_IN', '_OUT') {
        push @in, map dbh->quote("$_$dir"), @serviceids;
      }
      my $in = join(',', @in);

      if ( ! $torrus_srvderive->last_srv_date ) {
        warn "finding initial last_srv_date for $serviceid\n" if $DEBUG;
        my $dsql = "SELECT srv_date FROM srvexport WHERE serviceid IN ($in)
                    ORDER BY srv_date LIMIT 1";
        my $dsth = dbh->prepare($dsql) or die $DBI::errstr;
        $dsth->execute or die $dsth->errstr;
        my $date = $dsth->fetchrow_arrayref->[0];
        if ( $date ) {
          warn "found initial last_srv_date of $date; updating $serviceid\n"
            if $DEBUG;
          $torrus_srvderive->last_srv_date($date);
          my $error = $torrus_srvderive->replace;
          die $error if $error;
        } else {
          warn "no initial last_srv_date for $serviceid; skipping\n" if $DEBUG;
          exit;
        }
      }

      #alternate (hopefully more efficient) path: if we're more than a day
      #behind, call native torrus srvderive
      my $last_srv_date = $torrus_srvderive->last_srv_date;
      my $currdate = time2str('%Y-%m-%d', time);
      if ( $last_srv_date ne $currdate
           && time - str2time($last_srv_date) > (24+6)*60*60 #6 hour window for
         ) {                                                 #yesterday
        warn "serviceid $serviceid last_srv_date != $currdate\n" if $DEBUG;

        #delete records from last_srv_date
        my $dsql =
          'DELETE FROM srvexport WHERE srv_date = ? AND serviceid IN(?,?)';
        my $dsth = dbh->prepare($dsql) or die $DBI::errstr;
        warn "$dsql $last_srv_date ${serviceid}_IN ${serviceid}_OUT\n"
          if $DEBUG;
        $dsth->execute($last_srv_date, $serviceid.'_IN', $serviceid.'_OUT')
          or die $dsth->errstr;
        dbh->commit or die dbh->errstr;

        foreach my $dir ('_IN','_OUT') {

          #XX TORRUS_PATH
          my @cmd = ( '/usr/local/bin/torrus', 'srvderive',
                        "--start=$last_srv_date", "--end=$currdate",
                        "--out=$serviceid$dir", "--func=SUM",
                        map "$_$dir", @serviceids
                    );

          warn "calling native srvderive: ". join(' ', @cmd). "\n" if $DEBUG;
          my $out_and_err = '';
          run( \@cmd, '>&', \$out_and_err ) #verbose?  timeout?
            or die "error calling native srvder: $out_and_err\n";

        }
        warn "native srvderive done" if $DEBUG;

        $torrus_srvderive->last_srv_date($currdate);
        my $error = $torrus_srvderive->replace;
        die $error if $error;

        #don't bother trying today's immediately
        warn "done with $serviceid for now\n" if $DEBUG;
        exit; #end-of-kid

      }

      my $ssql = "
        $sql AND EXISTS (
                         SELECT 1 FROM srvexport AS other
                           WHERE other.serviceid IN ($in)
                             AND srvexport.srv_date = other.srv_date
                             AND ABS( $_date - $other_date ) <= 60
                       )
      ";

      $ssql .= " AND srv_date >= '". $torrus_srvderive->last_srv_date. "' "
        if $torrus_srvderive->last_srv_date;

      $ssql .= $orderlimit;

      warn "searching for times to add $serviceid\n" if $DEBUG;
      warn $ssql if $DEBUG > 2;
      my $sth = dbh->prepare($ssql) or die $DBI::errstr; #better recovery here?

      eval {
        my $h = set_sig_handler( 'ALRM', sub { die "_timeout\n"; } );
        alarm($search_timeout);
        $sth->execute($serviceid, $serviceid) or die $sth->errstr;
        alarm(0);
      };
      alarm(0);
      
      if ( $@ && $@ eq "_timeout\n" ) {
        #warn "search timed out; reconnecting and restarting\n";
        warn "search timed out\n";
        dbh->clone()->do("KILL QUERY ". dbh->{"mysql_thread_id"})
          if driver_name =~ /mysql/i;
        dbh->rollback; #or die dbh->errstr;
        #adminsuidsetup($user);
        #next SERVICEID; #MAIN;
        exit;
      } elsif ( $@ ) {
        die $@;
      }

      warn "search for $serviceid finished; checking results\n" if $DEBUG;

      my $prev = 0;
      while ( my $row = $sth->fetchrow_arrayref ) {
        last if sigterm() || sigint();

        my( $srv_date, $srv_time ) = @$row;
        my $cur = str2time( "$srv_date $srv_time" );
        next if $cur-$prev <= 60;
        last if time - $cur <= 300;

        warn "no $serviceid for $srv_date $srv_time; adding\n"
          if $DEBUG;
        $found++;

        for my $dir ('_IN', '_OUT') {

          my $sin = join(',', map dbh->quote("$_$dir"), @serviceids);

          my $srv_date = time2str('%Y-%m-%d', $cur);

          my $sum = "
            SELECT COALESCE(SUM(value),0) FROM srvexport AS other
              WHERE other.serviceid IN ($sin)
                AND other.srv_date = '$srv_date'
                AND ABS( $cur - $other_date ) <= 60
          ";
          my $ssth = dbh->prepare($sum) or die $DBI::errstr;
          $ssth->execute or die $ssth->errstr; #better recovery?
          my $value = $ssth->fetchrow_arrayref->[0];

          my $isql = "
            INSERT INTO srvexport (srv_date, srv_time, serviceid, value, intvl)
              VALUES ( ?, ?, ?, ?, ? )
          ";
          my @param = ( $srv_date,
                        time2str('%X', $cur), #srv_time
                        "$serviceid$dir",     #serviceid
                        $value,
                        300,                  #intvl ... 
                      );
          warn $isql. ' with param '. join(',',@param). "\n"
            if $DEBUG > 2;

          my $isth = dbh->prepare($isql) or die $DBI::errstr; #better recovery?

          #stupid mysql deadlocks all the time on insert, so we need to recover
          unless ( $isth->execute(@param) ) {
            #warn "Error inserting data for $serviceid$dir (restarting): ".
            #     $isth->errstr;
            warn "Error inserting data for $serviceid$dir: ". $isth->errstr;
            dbh->rollback; #or die dbh->errstr;
            #sleep 5;
            #next SERVICEID; #MAIN;
            exit;
          }
                          
        }

        if ( $srv_date ne $torrus_srvderive->last_srv_date ) {
          warn "updating last_srv_date of $serviceid to $srv_date\n" if $DEBUG;
          $torrus_srvderive->last_srv_date($srv_date);
          my $error = $torrus_srvderive->replace;
          die $error if $error;
        }
        dbh->commit or die dbh->errstr;

        $prev = $cur;
      }
      warn "done with $serviceid\n" if $DEBUG;

      exit;
      #end-of-kid
    }

  } #foreach my $torrus_srvderive
  dbh->commit or die dbh->errstr;

  myexit() if sigterm() || sigint();
  warn "restarting main loop\n" if $DEBUG > 1;
  #sleep 60 unless $found;
}

sub _shouldrun {
     $conf->exists('network_monitoring_system')
  && $conf->config('network_monitoring_system') eq 'Torrus_Internal';
}

sub usage { 
  die "Usage:\n\n  freeside-cdrrewrited user\n";
}

sub reap_kids {
  foreach my $pid ( keys %kids ) {
    my $kid = waitpid($pid, WNOHANG);
    if ( $kid > 0 ) {
      $kids--;
      delete $kids{$kid};
    }
  }
}

=head1 NAME

freeside-torrus-srvderive - Freeside's Torrus virtual port daemon.

=head1 SYNOPSIS

  freeside-torrus-srvderive

=head1 DESCRIPTION

Runs continuously, searches for samples in the srvexport table which do not
have an entry for combined virtual ports, and adds them.

=head1 SEE ALSO

=cut

1;