Tuesday, February 5, 2008

Getting Squid Access report-SARG

This is a test that I have done for the john keels system integrator keels business system ltd squid server,waiting for permisson to launch it in real.I configure /etc/squid/squid.conf and start squid service.Set proxy address to the squid server ip 192.168.10.110 and the port 3128.Internet Access Monitor for Squid, is monitoring the efficiency of a company Internet bandwidth usage. Using this feature we can easily find out who, when, where to, where from and what accessed the Internet.Here I use sarg,Squid Analysis Report Generator a tool that allows you to view "where" your users are going to on the Internet.Sarg generate reports in html, with many fields, like: users, IP Addresses, bytes, sites and times.
I Downloaded sarg-2.2.1-1.el4.rf.i386.rpm file for RHEL4 edition from:
http://dag.wieers.com/rpm/packages/sarg/.

1.Install Sarg. Configuration file is /etc/sarg.conf. You may modify it as you wish. You need to create output directory as described in sarg.conf.
# rpm –ivh sarg-2.2.1-1.el4.rf.i386.rpm
# mkdir /srv/www/htdocs/squid-reports

2.Run Sarg daily. Edit /etc/crontab and add following line.
0 4 * * * root /usr/bin/sarg >/dev/null 2>&1

3.You can see the analysis report from http://hostname/squid-reports

After installing I configure /etc/sarg/sarg.conf as I wish to get reports.

# sarg.conf
#
# TAG: language
# Available languages:
# Bulgarian_windows1251
# Catalan
# Czech
# Czech_UTF8
# Dutch
# English
# French
# German
# Greek
# Hungarian
# Indonesian
# Italian
# Japanese
# Latvian
# Polish
# Portuguese
# Romanian
# Russian_koi8
# Russian_UFT-8
# Russian_windows1251
# Serbian
# Slovak
# Spanish
# Turkish
#
#language English

# TAG: access_log file
# Where is the access.log file
# sarg -l file
#
#access_log /usr/local/squid/var/logs/access.log

# TAG: graphs yes|no
# Use graphics where is possible.
# graph_days_bytes_bar_color blue|green|yellow|orange|brown|red
#
#graphs yes
#graph_days_bytes_bar_color orange

# TAG: title
# Especify the title for html page.
#
#title "Squid User Access Reports"

# TAG: font_face
# Especify the font for html page.
#
#font_face Tahoma,Verdana,Arial

# TAG: header_color
# Especify the header color
#
#header_color darkblue

# TAG: header_bgcolor
# Especify the header bgcolor
#
#header_bgcolor blanchedalmond

# TAG: font_size
# Especify the text font size
#
#font_size 9px

# TAG: header_font_size
# Especify the header font size
#
#header_font_size 9px

# TAG: title_font_size
# Especify the title font size
#
#title_font_size 11px

# TAG: background_color
# TAG: background_color
# Html page background color
#
# background_color white

# TAG: text_color
# Html page text color
#
#text_color #000000

# TAG: text_bgcolor
# Html page text background color
#
#text_bgcolor lavender

# TAG: title_color
# Html page title color
#
#title_color green

# TAG: logo_image
# Html page logo.
#
#logo_image none

# TAG: logo_text
# Html page logo text.
#
#logo_text ""

# TAG: logo_text_color
# Html page logo texti color.
#
#logo_text_color #000000

# TAG: logo_image_size
# Html page logo image size.
# width height
#
#image_size 80 45

# TAG: background_image
# Html page background image
#
#background_image none

# TAG: password
# User password file used by Squid authentication scheme
# If used, generate reports just for that users.
#
#password none

# TAG: temporary_dir
# Temporary directory name for work files
# sarg -w dir
#
#temporary_dir /tmp

# TAG: output_dir
# The reports will be saved in that directory
# sarg -o dir
#
#output_dir /var/www/html/squid-reports

# TAG: output_email
# Email address to send the reports. If you use this tag, no html reports will be generated.
# sarg -e email
#
#output_email none

# TAG: resolve_ip yes/no
# Convert ip address to dns name
# sarg -n
#resolve_ip no

# TAG: user_ip yes/no
# Use Ip Address instead userid in reports.
# sarg -p
#user_ip no

# TAG: topuser_sort_field field normal/reverse
# Sort field for the Topuser Report.
# Allowed fields: USER CONNECT BYTES TIME
#
#topuser_sort_field BYTES reverse

# TAG: user_sort_field field normal/reverse
# Sort field for the User Report.
# Allowed fields: SITE CONNECT BYTES TIME
#
#user_sort_field BYTES reverse

# TAG: exclude_users file
# users within the file will be excluded from reports.
# you can use indexonly to have only index.html file.
#
#exclude_users none

# TAG: exclude_hosts file
# Hosts, domains or subnets will be excluded from reports.
#
# Eg.: 192.168.10.10 - exclude ip address only
# 192.168.10.0 - exclude full C class
# s1.acme.foo - exclude hostname only
# acme.foo - exclude full domain name
#
#exclude_hosts none

# TAG: useragent_log file
# useragent.log file patch to generate useragent report.
#
#useragent_log none

# TAG: date_format
# Date format in reports: e (European=dd/mm/yy), u (American=mm/dd/yy), w (Weekly=yy.ww)
#
#date_format u

# TAG: per_user_limit file MB
# Saves userid on file if download exceed n MB.
# This option allow you to disable user access if user exceed a download limit.
#
#per_user_limit none

# TAG: lastlog n
# How many reports files must be keept in reports directory.
# The oldest report file will be automatically removed.
# 0 - no limit.
#
#lastlog 0

# TAG: remove_temp_files yes
# Remove temporary files: geral, usuarios, top, periodo from root report directory.
#
#remove_temp_files yes

# TAG: index yes|no|only
# Generate the main index.html.
# only - generate only the main index.html
#
#index yes

# TAG: index_tree date|file
# How to generate the index.
#
#index_tree file

# TAG: overwrite_report yes|no
# yes - if report date already exist then will be overwrited.
# no - if report date already exist then will be renamed to filename.n, filename.n+1
#
#overwrite_report no

# TAG: records_without_userid ignore|ip|everybody
# What can I do with records without user id (no authentication) in access.log file ?
#
# ignore - This record will be ignored.
# ip - Use ip address instead. (default)
# everybody - Use "everybody" instead.
#
#records_without_userid ip

# TAG: use_comma no|yes
# Use comma instead point in reports.
# Eg.: use_comma yes => 23,450,110
# use_comma no => 23.450.110
#
#use_comma no

# TAG: mail_utility mail|mailx
# Mail command to use to send reports via SMTP
#
#mail_utility mailx

# TAG: topsites_num n
# How many sites in topsites report.
#
#topsites_num 100

# TAG: topsites_sort_order CONNECT|BYTES A|D
# Sort for topsites report, where A=Ascendent, D=Descendent
#
#topsites_sort_order CONNECT D

# TAG: index_sort_order A/D
# Sort for index.html, where A=Ascendent, D=Descendent
#
#index_sort_order D

# TAG: exclude_codes file
# Ignore records with these codes. Eg.: NONE/400
#
#exclude_codes /usr/local/sarg/exclude_codes

# TAG: replace_index string
# Replace "index.html" in the main index file with this string
# If null "index.html" is used
#
#replace_index

# TAG: max_elapsed milliseconds
# If elapsed time is recorded in log is greater than max_elapsed use 0 for elapsed time.
# Use 0 for no checking
#
#max_elapsed 28800000
# 8 Hours

# TAG: report_type type
# What kind of reports to generate.
# topusers - users, sites, times, bytes, connects, links to accessed sites, etc
# topsites - site, connect and bytes report
# sites_users - users and sites report
# users_sites - accessed sites by the user report
# date_time - bytes used per day and hour report
# denied - denied sites with full URL report
# auth_failures - autentication failures report
# site_user_time_date - sites, dates, times and bytes report
# downloads - downloads per user report
#
# Eg.: report_type topsites denied
#
#report_type topusers topsites sites_users users_sites date_time denied auth_failures site_user_time_date downloads

# TAG: usertab filename
# You can change the "userid" or the "ip address" to be a real user name on the reports.
# Table syntax:
# userid name or ip address name
# Eg:
# SirIsaac Isaac Newton
# vinci Leonardo da Vinci
# 192.168.10.1 Karol Wojtyla
#
# Each line must be terminated with '\n'
#
#usertab none

# TAG: long_url yes|no
# If yes, the full url is showed in report.
# If no, only the site will be showed
#
# YES option generate very big sort files and reports.
#
#long_url no

# TAG: date_time_by bytes|elap
# Date/Time reports will use bytes or elapsed time?
#
#date_time_by elap

# TAG: charset name
# ISO 8859 is a full series of 10 standardized multilingual single-byte coded (8bit)
# graphic character sets for writing in alphabetic languages
# You can use the following charsets:
# Latin1 - West European
# Latin2 - East European
# Latin3 - South European
# Latin4 - North European
# Cyrillic
# Arabic
# Greek
# Hebrew
# Latin5 - Turkish
# Latin6
# Windows-1251
# Japan
# Koi8-r
# UTF-8
#
#charset Latin1

# TAG: user_invalid_char "&/"
# Records that contain invalid characters in userid will be ignored by Sarg.
#
#user_invalid_char "&/"

# TAG: privacy yes|no
# privacy_string "***.***.***.***"
# privacy_string_color blue
# In some countries the sysadm cannot see the visited sites by a restrictive law.
# Using privacy yes the visited url will be changes by privacy_string and the link
# will be removed from reports.
#
#privacy no
#privacy_string "***.***.***.***"
#privacy_string_color blue

# TAG: include_users "user1:user2:...:usern"
# Reports will be generated only for listed users.
#
#include_users none

# TAG: exclude_string "string1:string2:...:stringn"
# Records from access.log file that contain one of listed strings will be ignored.
#
#exclude_string none

# TAG: show_successful_message yes|no
# Shows "Successful report generated on dir" at end of process.
#
#show_successful_message yes

# TAG: show_read_statistics yes|no
# Shows some reading statistics.
#
#show_read_statistics yes

# TAG: topuser_fields
# Which fields must be in Topuser report.
#
#topuser_fields NUM DATE_TIME USERID CONNECT BYTES %BYTES IN-CACHE-OUT USED_TIME MILISEC %TIME TOTAL AVERAGE

# TAG: user_report_fields
# Which fields must be in User report.
#
#user_report_fields CONNECT BYTES %BYTES IN-CACHE-OUT USED_TIME MILISEC %TIME TOTAL AVERAGE

# TAG: bytes_in_sites_users_report yes|no
# Bytes field must be in Site & Users Report ?
#
#bytes_in_sites_users_report no

# TAG: topuser_num n
# How many users in topsites report. 0 = no limit
#
#topuser_num 0

# TAG: site_user_time_date_type list|table
# generate reports for site_user_time_date in list or table format
#
#site_user_time_date_type table

# TAG: datafile file
# Save the report results in a file to populate some database
#
#datafile none

# TAG: datafile_delimiter ";"
# ascii character to use as a field separator in datafile
#
#datafile_delimiter ";"

# TAG: datafile_fields all
# Which data fields must be in datafile
# user;date;time;url;connect;bytes;in_cache;out_cache;elapsed
#
#datafile_fields user;date;time;url;connect;bytes;in_cache;out_cache;elapsed

# TAG: datafile_url ip|name
# Saves the URL as ip or name in datafile
#
#datafile ip

# TAG: weekdays
# The weekdays to take account ( Sunday->0, Saturday->6 )
# Example:
#weekdays 1-3,5
# Default:
#weekdays 0-6

# TAG: hours
# The hours to take account
# Example:
#hours 7-12,14,16,18-20
# Default:
#hours 0-23

# TAG: dansguardian_conf file
# DansGuardian.conf file path
# Generate reports from DansGuardian logs.
# Use 'none' to disable it.
# dansguardian_conf /usr/dansguardian/dansguardian.conf
#
#dansguardian_conf none

# TAG: dansguardian_ignore_date on|off
# 'on' must use the record even the date range is different from the used in squid access.log file.
# 'off' must use the record only if the date range is in the irange used in squid access.log file.
#
#dansguardian_ignore_date off

# TAG: squidguard_conf file
# path to squidGuard.conf file
# Generate reports from SquidGuard logs.
# Use 'none' to disable.
# You can use sarg -L filename to use an alternate squidGuard log.
# squidguard_conf /usr/local/squidGuard/squidGuard.conf
#
#squidguard_conf none

# TAG: squidguard_ignore_date on|off
# Use 'on' use the record even the date range is different from the used squid access.log file.
# Use 'off' use the record only if the date range is in the used squid access.log file.
#
#squidguard_ignore_date off

# TAG: squidguard_log_format
# Format string SquidGuard logs.
# REJIK #year#-#mon#-#day# #hour# #list#:#tmp# #ip# #user# #tmp#/#tmp#/#url#/#end#
# SQUIDGUARD #year#-#mon#-#day# #hour# #tmp#/#list#/#tmp#/#tmp#/#url#/#tmp# #ip#/#tmp# #user# #end#
#squidguard_log_format #year#-#mon#-#day# #hour# #tmp#/#list#/#tmp#/#tmp#/#url#/#tmp# #ip#/#tmp# #user# #end#

# TAG: show_sarg_info yes|no
# shows sarg information and site path on each report bottom
#
#show_sarg_info yes

# TAG: show_sarg_logo yes|no
# shows sarg logo
#
#show_sarg_logo yes

# TAG: parsed_output_log directory
# Saves the processed log in a sarg format after parsing the squid log file.
# This is a way to dump all of the data structures out, after parsing from
# the logs (presumably this data will be much smaller than the log files themselves),
# and pull them back in for later processing and merging with data from previous logs.
#
#parsed_output_log none

# TAG: parsed_output_log_compress /bin/gzip|/usr/bin/bzip2|nocompress
# sarg logs compress util
#
#parsed_output_log_compress /bin/gzip

# TAG: displayed_values bytes|abbreviation
# how the values will be displayed in reports.
# eg. bytes - 209.526
# abbreviation - 210K
#
#displayed_values bytes

# Report limits
# TAG: authfail_report_limit n
# TAG: denied_report_limit n
# TAG: siteusers_report_limit n
# TAG: squidguard_report_limit n
# TAG: user_report_limit n
# TAG: dansguardian_report_limit n
# TAG: download_report_limit n
# report limits (lines).
# '0' no limit
#
#authfail_report_limit 10
#denied_report_limit 10
#siteusers_report_limit 0
#squidguard_report_limit 10
#dansguardian_report_limit 10
#user_report_limit 10
#user_report_limit 50

# TAG: www_document_root dir
# Where is your Web DocumentRoot
# Sarg will create sarg-php directory with some PHP modules:
# - sarg-squidguard-block.php - add urls from user reports to squidGuard DB
#
#www_document_root /var/www/html

# TAG: block_it module_url
# This tag allow you to pass urls from user reports to a cgi or php module,
# to be blocked by some Squid acl
#
# Eg.: block_it /sarg-php/sarg-block-it.php
# sarg-block-it is a php that will append a url to a flat file.
# You must change /var/www/html/sarg-php/sarg-block-it to point to your file
# in $filename variable, and chown to a httpd owner.
#
# sarg will pass http://module_url?url=url
#
#block_it none

# TAG: external_css_file path
# This tag allow internal sarg css override.
# Sarg use theses style classes:
# .body body class
# .info sarg information class, align=center
# .title title class, align=center
# .header header class, align:left
# .header2 header class, align:right
# .header3 header class, align:right
# .text text class, align:left
# .data table text class, align:right
# .data2 table text class, align:right, border colors
# .link link class
#
# There is a sample in /usr/local/sarg/etc/css.tpl
#
#external_css_file none

# TAG: user_authentication yes|no
# Allow user authentication in User Reports using .htaccess
# Parameters:
# AuthUserFile - where the user password file is
# AuthName - authentication realm. Eg "Members Only"
# AuthType - authenticaion type - basic
# Require - authorized users to see the report.
# %u - user report
#
# user_authentication no
# AuthUserFile /usr/local/sarg/passwd
# AuthName "SARG, Restricted Access"
# AuthType Basic
# Require user admin %u

# TAG: download_suffix "suffix,suffix,...,suffix"
# file suffix to be considered as "download" in Download report.
# Use 'none' to disable.
#
#download_suffix "zip,arj,bzip,gz,ace,doc,iso,adt,bin,cab,com,dot,drv$,lha,lzh,mdb,mso,ppt,rtf,src,shs,sys,exe,dll,mp3,avi,mpg,mpeg"

# TAG: ulimit n
# The maximum number of open file descriptors to avoid "Too many open files" error message.
# You need to run sarg as root to use ulimit tag.
# If you run sarg with a low privilege user, set to 'none' to disable ulimit
#
#ulimit 20000

# TAG: ntlm_user_format username|domainname+username
# NTLM users format.
#
#ntlm_user_format domainname+username

# TAG: realtime_refresh_time num sec
# How many time to auto refresh the realtime report
# 0 = disable
#
# realtime_refresh_time 3

# TAG: realtime_access_log_lines num
# How many last lines to get from access.log file
#
# realtime_access_log_lines 1000

# TAG: realtime_types: GET,PUT,CONNECT,ICP_QUERY,POST
# Which records must be in realtime report.
#
# realtime_types GET,PUT,CONNECT

# TAG: realtime_unauthenticated_records: ignore|show
# What to do with unauthenticated records in realtime report.
#
# realtime_unauthenticated_records: show

# TAG: byte_cost value no_cost_limit
# Cost per byte.
# Eg. byte_cost 0.01 100000000
# per byte cost = 0.01
# bytes with no cost = 100 Mb
# 0 = disable
#
# byte_cost 0.01 50000000

# TAG: squid24 on|off
# Compatilibity with squid version <= 2.4 when using emulate_http_log on
#
# squid24 off


Sarg-reports (this file) is a simple bash script written to automate the SARG (a powerful squid log analyzer) reports and log management.Sarg it self, provide to end user a generic interface to create reports based on squid access log (begin of log to current date).sarg-reports (this script) is useful because it allow you to easly create and manage Daily, Weekly and Monthly reports.

No comments: