Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,5 @@ target/
Unused/
.idea/
POP/
.ipynb_checkpoints/
*.ipynb
42 changes: 21 additions & 21 deletions demo/Drain_demo.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
#!/usr/bin/env python
import sys
sys.path.append('../')
from logparser import Drain
input_dir = '../logs/HDFS/' # The input directory of log file
output_dir = 'Drain_result/' # The output directory of parsing results
log_file = 'HDFS_2k.log' # The input log file name
log_format = '<Date> <Time> <Pid> <Level> <Component>: <Content>' # HDFS log format
# Regular expression list for optional preprocessing (default: [])
regex = [
r'blk_(|-)[0-9]+' , # block id
r'(/|)([0-9]+\.){3}[0-9]+(:[0-9]+|)(:|)', # IP
r'(?<=[^A-Za-z0-9])(\-?\+?\d+)(?=[^A-Za-z0-9])|[0-9]+$', # Numbers
]
st = 0.5 # Similarity threshold
depth = 4 # Depth of all leaf nodes
parser = Drain.LogParser(log_format, indir=input_dir, outdir=output_dir, depth=depth, st=st, rex=regex)
parser.parse(log_file)
#!/usr/bin/env python
import sys
sys.path.append('../')
from logparser import Drain

input_dir = '../logs/HDFS/' # The input directory of log file
output_dir = 'Drain_result/' # The output directory of parsing results
log_file = 'HDFS_2k.log' # The input log file name
log_format = '<Date> <Time> <Pid> <Level> <Component>: <Content>' # HDFS log format
# Regular expression list for optional preprocessing (default: [])
regex = [
r'blk_(|-)[0-9]+' , # block id
r'(/|)([0-9]+\.){3}[0-9]+(:[0-9]+|)(:|)', # IP
r'(?<=[^A-Za-z0-9])(\-?\+?\d+)(?=[^A-Za-z0-9])|[0-9]+$', # Numbers
]
st = 0.5 # Similarity threshold
depth = 4 # Depth of all leaf nodes

parser = Drain.LogParser(log_format, indir=input_dir, outdir=output_dir, depth=depth, st=st, rex=regex)
parser.parse(log_file)

8 changes: 4 additions & 4 deletions logparser/Drain/Drain.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def outputResult(self, logClustL):

def printTree(self, node, dep):
pStr = ''
for i in xrange(dep):
for i in range(dep):
pStr += '\t'

if node.depth == 0:
Expand All @@ -234,7 +234,7 @@ def printTree(self, node, dep):
else:
pStr += node.digitOrtoken

print pStr
print(pStr)

if node.depth == self.depth:
return 1
Expand Down Expand Up @@ -273,7 +273,7 @@ def parse(self, logName):

count += 1
if count % 1000 == 0 or count == len(self.df_log):
print 'Processed {0:.1f}% of log lines.'.format(count * 100.0 / len(self.df_log))
print('Processed {0:.1f}% of log lines.'.format(count * 100.0 / len(self.df_log)))


if not os.path.exists(self.savePath):
Expand Down Expand Up @@ -320,7 +320,7 @@ def generate_logformat_regex(self, logformat):
regex = ''
for k in range(len(splitters)):
if k % 2 == 0:
splitter = re.sub(' +', '\s+', splitters[k])
splitter = re.sub(' +', '\\\s+', splitters[k])
regex += splitter
else:
header = splitters[k].strip('<').strip('>')
Expand Down
2 changes: 1 addition & 1 deletion logparser/Drain/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from Drain import *
from .Drain import *