#!/usr/bin/perl

#build 20191105

use DBI; # DBI  Perl!!!
use URI;

#=======================CONFIGURATION BEGIN============================

#mysql default config
$host = "localhost"; # host s DB
$port = "3306"; # port DB
$user = "squid"; # username k DB
$pass = "ASxjC7Gftx8IvRfF"; # pasword k DB
$db = "squid"; # name DB

#==========================================================
#Path to access.log. It could be full path, e.g. /var/log/squid/access.log
#Put k access.log. Eto mozhet bit polnii put. Naprimer, /var/log/squid/access.log
#Путь к файлу access.log(имя может другим). Это может быть и полный путь, например, /var/log/squid/access.log

my $filetoparse="/var/log/squid/access.log";
#==========================================================

#How older data must be deleted. In example, older than 100 days from max date.
#Period, starshe kotorogo dannie budut udaliatsia. Ukazivaetsia v dniah.
#Период, старше которого данные будут удаляться. Указывается в днях.

my $deleteperiod=120; #days
#==========================================================
#min bytes of traffic in one record to write into DB. By default - all data stored.

my $minbytestoparse=-1; #bytes, default -1

#=======================CONFIGURATION END==============================

$count=0;
$lastdate=0;
$sqltext="";
$sql_getlastdate="";

$sqlbatch=100;
$overlap=300;

#datetime when parse started
print $now=localtime;
$startnow=time;

$dbh = DBI->connect("DBI:mysql:$db:$host:$port",$user,$pass);

$sth = $dbh->prepare("SELECT ifnull(unix_timestamp(max(access_date))-$overlap,0) from access_log");
$sth->execute;
@row=$sth->fetchrow_array;
$lastdate=$row[0];

if ($deleteperiod) {
  $sth = $dbh->prepare("delete from access_log where access_date <  date_sub(now(), interval $deleteperiod day)");
  $sth->execute;
}

#open log file for reading
open(IN, "<$filetoparse"); 

$countlines=0; 
$countadded=0;

print "\n";

$sqlspool = 0;
$sqltext = "";

#loop for get strings from file one by one.
while (my $line=<IN>) {

    if(time > $seconds+1) {
        $seconds=time;
        $insertspeed=$countinsert;
    }
    $countinsert++;

    @item = split " ", $line; 

    $bytes = $item[4];
   $time  = $item[0];
    if (($bytes>0) && ($time>$lastdate)) {
        $ms = $item[1];
        $client_ip = $item[2];
        $http_code = $item[3];
        $method = $item[5];
        $url = $item[6];
        @matches = ($url=~ /(?:^[a-z][a-z0-9+\-.]*:\/\/)?(?:[a-z0-9\-._~%!$&'()*+,;=]+(?::.*)?@)?([a-z0-9\-._~%]+|\[[a-z0-9\-._~%!$&'()*+,;=:]+\])/i);
        $host=$matches[0];
        $user = $item[7];
        $mime = $item[9];
        if ($sqltext) {
          $sqltext=$sqltext.",($time,$ms,'$client_ip','$http_code',$bytes,'$method','$host','$user','$mime')";
        } else {
          $sqltext="($time,$ms,'$client_ip','$http_code',$bytes,'$method','$host','$user','$mime')";
        }
        $sqlspool++;
        if ($sqlspool > $sqlbatch) {
          $sqltext  = "insert into tmp_traffic(timestamp,process_time,client_host,http_code,bytes,http_method,server_host,username,mime_type) values".$sqltext;
          $sth = $dbh->prepare($sqltext);
          $sth->execute;
          $sqlspool = 0;
          $sqltext = "";
        }
        print "Completed: ".$countlines." ".$insertspeed." lines/sec\r";
        $countlines++;
    }
    
}

if ($sqltext) {
  $sqltext  = "insert into tmp_traffic(timestamp,process_time,client_host,http_code,bytes,http_method,server_host,username,mime_type) values".$sqltext;
  $sth = $dbh->prepare($sqltext);
  $sth->execute;
}

print "\n";
close(IN);

$sth = $dbh->prepare("CALL Process_Traffic();");
$sth->execute;

$rc = $dbh->disconnect;
