HiiHahWIKI - making some notes for... -
Python:apacheアクセスログをsqlite3にインポートしてみた
をテンプレートにして作成
開始行:
*Python:apacheアクセスログをsqlite3にインポートしてみた ...
Pythonのcsvモジュールを使用すれば、apacheのアクセスログは...
調べて作ったスクリプトをベタ貼り。
ちなみに、元情報はここです。「lolloo-htn」さん、すてきな...
-http://d.hatena.ne.jp/lolloo-htn/20090130/1233327457
して、そのままではなくて、ところどころ独自処理を入れてま...
-ログのフォーマットがデフォルトではなかったり。
-sqlite3ではlike分で正規表現使えないため、事前に加工して...
import csv, sqlite3, datetime, sys, re
class ApacheDialect(csv.Dialect):
delimiter = " "
doublequote = True
lineterminator="\r\n"
quotechar='"'
quoting=0
skipinitialspace=False
class ApacheLog:
_DETAILPATTERN_ = """^/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a...
_DETAILFMT_ = re.compile(_DETAILPATTERN_)
def __init__(self,line):
self.log={}
self.log['ip'] = line[0]
self.log['remotelog'] = line[1]
self.log['remoteuser'] = line[2]
self.log['time_hour'] = self._timeparser(line[3]...
self.log['time_minute'] = self._timeparser(line[...
self.log['time_second'] = self._timeparser(line[...
self.log['method'] = line[5].split()[0]
self.log['url'] = line[5].split()[1]
self.log['location'] = line[5].split()[1].split(...
self.log['status'] = line[6]
self.log['bytes'] = line[7]
self.log['referer'] = line[8]
self.log['ua'] = line[9]
self.log['xforwardedproto'] = line[10]
self.log['remotehost'] = line[11]
self.log['duration'] = line[12]
self.log['isdetailpage'] = self._is_detail_page(...
def __getattr__(self, attr):
return self.log[attr]
def _timeparser(self,str,flg=3):
t = datetime.datetime.strptime(str ,"[%d/%b/%Y:%...
if flg == 1:
return t.strftime("%Y%m%d%H")
elif flg == 2:
return t.strftime("%Y%m%d%H%M")
else:
return t.strftime("%Y%m%d%H%M%S")
def _is_detail_page(self, str):
if self._DETAILFMT_.search(str):
return True
return False
if __name__ == '__main__':
argvs = sys.argv
if len(argvs) != 3:
print "usage : python accesslog2sqlite.py [acces...
print "example : python accesslog2sqlite.py acce...
exit(255)
logfile = argvs[1]
dbfile = argvs[2]
ddl = ''' create table if not exists access_log (
ip text,
remotelog text,
remoteuser text,
time_hour integer,
time_minute integer,
time_second integer,
method text,
url text,
location text,
status text,
bytes integer,
referer text,
ua text,
xforwardedproto text,
remotehost teger,
duration integer,
isdetailpage text
)
'''
csv.register_dialect("apache",ApacheDialect)
reader = csv.reader(open(logfile),dialect="apache")
c = sqlite3.connect(dbfile)
c.execute(ddl)
for aline in reader:
if len(aline) != 13:
continue
l = ApacheLog(aline)
# print l.log
t = (l.ip, l.remotelog, l.remoteuser, l.time_hou...
c.execute("insert into access_log values (?,?,?,...
c.commit()
c.close()
***突っ込んだあとの処理 [#a444556d]
こんなSQLを作ってみました。
これでババっと集計できますw
■shuukei.sql
.separator \t
.output ./01_by_hour.txt
select time_hour, count(time_hour) from access_log group...
.output ./02_by_minute.txt
select time_minute , count(time_minute) from access_log ...
.output ./03_by_source_ip.txt
select ip, count(ip) from access_log group by ip order b...
.output ./04_by_useragent.txt
select "IE*" ,count(ip) from access_log where ua like '%...
select "IE7" ,count(ip) from access_log where ua like '%...
select "IE8" ,count(ip) from access_log where ua like '%...
select "IE9" ,count(ip) from access_log where ua like '%...
select "FireFox" ,count(ip) from access_log where ua lik...
select "Chrome" ,count(ip) from access_log where ua like...
select "iPad" ,count(ip) from access_log where ua like '...
select "iPhone" ,count(ip) from access_log where ua like...
select "Android*" ,count(ip) from access_log where ua li...
select "Android2*" ,count(ip) from access_log where ua l...
select "Android3*" ,count(ip) from access_log where ua l...
select "Android4*" ,count(ip) from access_log where ua l...
.output ./05_by_external_referer.txt
select referer, count(referer) from access_log where ref...
***参考サイト [#n5a919ea]
apacheのアクセスログの公式サイト
-http://httpd.apache.org/docs/2.2/ja/mod/mod_log_config.h...
python2.6のcsvモジュール
-http://docs.python.jp/2.6/library/csv.html
終了行:
*Python:apacheアクセスログをsqlite3にインポートしてみた ...
Pythonのcsvモジュールを使用すれば、apacheのアクセスログは...
調べて作ったスクリプトをベタ貼り。
ちなみに、元情報はここです。「lolloo-htn」さん、すてきな...
-http://d.hatena.ne.jp/lolloo-htn/20090130/1233327457
して、そのままではなくて、ところどころ独自処理を入れてま...
-ログのフォーマットがデフォルトではなかったり。
-sqlite3ではlike分で正規表現使えないため、事前に加工して...
import csv, sqlite3, datetime, sys, re
class ApacheDialect(csv.Dialect):
delimiter = " "
doublequote = True
lineterminator="\r\n"
quotechar='"'
quoting=0
skipinitialspace=False
class ApacheLog:
_DETAILPATTERN_ = """^/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a...
_DETAILFMT_ = re.compile(_DETAILPATTERN_)
def __init__(self,line):
self.log={}
self.log['ip'] = line[0]
self.log['remotelog'] = line[1]
self.log['remoteuser'] = line[2]
self.log['time_hour'] = self._timeparser(line[3]...
self.log['time_minute'] = self._timeparser(line[...
self.log['time_second'] = self._timeparser(line[...
self.log['method'] = line[5].split()[0]
self.log['url'] = line[5].split()[1]
self.log['location'] = line[5].split()[1].split(...
self.log['status'] = line[6]
self.log['bytes'] = line[7]
self.log['referer'] = line[8]
self.log['ua'] = line[9]
self.log['xforwardedproto'] = line[10]
self.log['remotehost'] = line[11]
self.log['duration'] = line[12]
self.log['isdetailpage'] = self._is_detail_page(...
def __getattr__(self, attr):
return self.log[attr]
def _timeparser(self,str,flg=3):
t = datetime.datetime.strptime(str ,"[%d/%b/%Y:%...
if flg == 1:
return t.strftime("%Y%m%d%H")
elif flg == 2:
return t.strftime("%Y%m%d%H%M")
else:
return t.strftime("%Y%m%d%H%M%S")
def _is_detail_page(self, str):
if self._DETAILFMT_.search(str):
return True
return False
if __name__ == '__main__':
argvs = sys.argv
if len(argvs) != 3:
print "usage : python accesslog2sqlite.py [acces...
print "example : python accesslog2sqlite.py acce...
exit(255)
logfile = argvs[1]
dbfile = argvs[2]
ddl = ''' create table if not exists access_log (
ip text,
remotelog text,
remoteuser text,
time_hour integer,
time_minute integer,
time_second integer,
method text,
url text,
location text,
status text,
bytes integer,
referer text,
ua text,
xforwardedproto text,
remotehost teger,
duration integer,
isdetailpage text
)
'''
csv.register_dialect("apache",ApacheDialect)
reader = csv.reader(open(logfile),dialect="apache")
c = sqlite3.connect(dbfile)
c.execute(ddl)
for aline in reader:
if len(aline) != 13:
continue
l = ApacheLog(aline)
# print l.log
t = (l.ip, l.remotelog, l.remoteuser, l.time_hou...
c.execute("insert into access_log values (?,?,?,...
c.commit()
c.close()
***突っ込んだあとの処理 [#a444556d]
こんなSQLを作ってみました。
これでババっと集計できますw
■shuukei.sql
.separator \t
.output ./01_by_hour.txt
select time_hour, count(time_hour) from access_log group...
.output ./02_by_minute.txt
select time_minute , count(time_minute) from access_log ...
.output ./03_by_source_ip.txt
select ip, count(ip) from access_log group by ip order b...
.output ./04_by_useragent.txt
select "IE*" ,count(ip) from access_log where ua like '%...
select "IE7" ,count(ip) from access_log where ua like '%...
select "IE8" ,count(ip) from access_log where ua like '%...
select "IE9" ,count(ip) from access_log where ua like '%...
select "FireFox" ,count(ip) from access_log where ua lik...
select "Chrome" ,count(ip) from access_log where ua like...
select "iPad" ,count(ip) from access_log where ua like '...
select "iPhone" ,count(ip) from access_log where ua like...
select "Android*" ,count(ip) from access_log where ua li...
select "Android2*" ,count(ip) from access_log where ua l...
select "Android3*" ,count(ip) from access_log where ua l...
select "Android4*" ,count(ip) from access_log where ua l...
.output ./05_by_external_referer.txt
select referer, count(referer) from access_log where ref...
***参考サイト [#n5a919ea]
apacheのアクセスログの公式サイト
-http://httpd.apache.org/docs/2.2/ja/mod/mod_log_config.h...
python2.6のcsvモジュール
-http://docs.python.jp/2.6/library/csv.html
ページ名: