#!/usr/bin/python3

'''
httop, HTTP protocol sniffer, sumarizer, ...

(c) 2009-2010,2019 Jan ONDREJ (SAL) <ondrejj(at)salstar.sk>

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.

'''

import sys, re, time, pcapy, getopt, curses

TIME_DELTA = 5

class unknown:
  type = 'unknown'
  def __init__(self, payload):
      self.payload = payload
  def data_offset(self):
      return 0
  def data(self):
      return self.payload[self.data_offset():]

class tcp(unknown):
  type = 'tcp'
  def src(self):
      return (ord(self.payload[0:1])<<8)+ord(self.payload[1])
  def dst(self):
      return (ord(self.payload[2:3])<<8)+ord(self.payload[3])
  def data_offset(self):
      return (ord(self.payload[12:13]) & 0xf0) >> 2
  def len(self):
      return len(self.payload)-self.data_offset()
  def is_fin(self):
      return (ord(self.payload[13:14])&1)==1

class ipv4:
  def __init__(self, payload):
      self.payload = payload[14:]
  def ihl(self):
      return (ord(self.payload[0:1]) & 0x0f) << 2
  def protocol(self):
      return ord(self.payload[9:10])
  def src(self):
      return [ord(x) for x in self.payload[12:16]]
  def dst(self):
      return [ord(x) for x in self.payload[16:20]]
  def as_string(self, addr):
      return '.'.join([str(x) for x in addr])
  def ssrc(self):
      return self.as_string(self.src())
  def sdst(self):
      return self.as_string(self.dst())
  def data(self):
      if self.protocol()==6: # TCP
        return tcp(self.payload[self.ihl():])
      return unknown(self.payload[self.ihl():])

class http(unknown):
  reg_header = re.compile('^[A-Z]+ ([^ \r\n]*) HTTP/1.[01]\r?$', re.M)
  reg_host = re.compile('^Host: *([^\r\n]*)\r?$', re.M|re.I)
  def path(self):
      header = self.reg_header.search(self.payload)
      if header:
        return header.group(1)
      return None
  def host(self):
      host = self.reg_host.search(self.payload)
      if host:
        return host.group(1)
      return None
  def host_path(self):
      return self.host(), self.path()
  def calc(self):
      self.hostname = self.host()
      self.pathname = self.path()
      return self

def si(key, unit='B', delimeter=' '):
    fix = ['', 'k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y']
    fix_len = len(fix)-1
    counter = 0
    while (key >= 1024) and (fix_len>counter):
      key /= 1024
      counter += 1
      if len(fix)==(counter-1):
        break
    return "%4d%s%1s%s" % (key, delimeter, fix[counter], unit)

class data_counter:
  def __init__(self):
      self.data = {}
  def add(self, key):
      if key in self.data:
        self.data[key] += 1
      else:
        self.data[key] = 1
  def __call__(self, **kw):
      return sorted(self.data.items(), key=lambda x: x[1], **kw)

class ps_counter:
  ''' Per second counter '''
  def __init__(self, lenght=60):
      t0 = time.time()
      self.data = [0 for x in range(lenght)]
      self.times = [t0 for x in range(lenght)]
  def add(self, amount=1):
      self.data[0] += amount
  def roll(self):
      self.data = [0] + self.data[:-1]
      self.times = [time.time()] + self.times[:-1]
  def __call__(self, range=6):
      delta = time.time() - self.times[int(range-1)]
      return sum(self.data[:int(range)]) / delta

class connection_counter:
  def __init__(self):
      self.urls = {}
      self.last_update = {}
      self.bytes = {}
      self.packets = {}
      self.closed = []
  def __call__(self, **kw):
      return sorted(self.packets.keys(), key=lambda x: self.bytes[x], **kw)
  def genkey(self, shost, sport, dhost, dport):
      if sport==80:
        return "%s:%s" % (dhost, dport)
      elif dport==80:
        return "%s:%s" % (shost, sport)
      else:
        return "%s:%d-%s:%d" % (shost, sport, dhost, dport)
  def add(self, bytes, shost, sport, dhost, dport, url=None):
      key = self.genkey(shost, sport, dhost, dport)
      if url:
        self.urls[key] = url
      elif bytes==0:
        return
      elif key not in self.urls:
        self.urls[key] = "?"
      if key in self.packets:
        self.bytes[key] += bytes
        self.packets[key] += 1
      else:
        self.bytes[key] = bytes
        self.packets[key] = 1
      self.last_update[key] = time.time()
  def b(self, key):
      return si(self.bytes[key])
  def close(self, shost, sport, dhost, dport):
      key = "%s:%d-%s:%d" % (shost, sport, dhost, dport)
      if key in self.packets:
        self.closed.append(key)
  def cleanup(self, delta=30):
      '''Clean all data older than "delta" seconds.'''
      for key in self.closed:
        try:
          del self.last_update[key]
          del self.bytes[key]
          del self.packets[key]
          del self.urls[key]
        except KeyError:
          pass
        rkey = key.split("-", 1)
        rkey = "%s-%s" % (rkey[1], rkey[0])
        try:
          del self.last_update[rkey]
          del self.bytes[rkey]
          del self.packets[rkey]
          del self.urls[rkey]
        except KeyError:
          pass
      self.closed = []
      old = time.time() - delta
      for key, value in self.last_update.items():
        if value<old:
          del self.last_update[key]
          del self.bytes[key]
          del self.packets[key]
          if key in self.urls:
            del self.urls[key]

def summary():
    s = "%9.2f request/s   |   UPLOAD: %9s   |   DOWNLOAD: %9s" \
           % (rps(),
              si(data_upload(30/TIME_DELTA), 'B/s'),
              si(data_download(30/TIME_DELTA), 'B/s'))
    return s

HELP='''\
httop.py, (c) 2009 Jan ONDREJ (SAL) <ondrejj(at)salstar.sk>
Lincensed under GPLv2+

Usage: httop.py [options]

  --help | -h		This help.
  --interface=X		Set sniffing interface to X.
  --delay=X		Set delay between screen refresh to X seconds.
  --match=X		Search for regular expression X in URL path.
  --src=X		Search for relular expression X in source IP address.
  --urls		Show URL summary.
  			By default virtualhost and IP summary is displayed.
  --bytes		Show bytes summary.
'''

opts, args = getopt.gnu_getopt(sys.argv[1:], 'h', [
  'interface=', 'delay=', 'match=', 'src=', 'urls', 'bytes', 'help'
])
opts = dict(opts)
if "--help" in opts or "-h" in opts:
  print(HELP)
  sys.exit(0)

# ncurses
stdscr = curses.initscr()
stdscr.clear()
maxy,maxx = stdscr.getmaxyx()
HEIGHT = maxy-1
WIDTH = maxx
win = curses.newwin(0, 0)
summary_win = curses.newpad(1, maxx)
data_win = curses.newpad(maxy-1, maxx)
summary_win.addstr(0, 0, "Collecting data, please wait ...")
data_win.addstr(5, 0, "\n")

CHECK_BYTES = "--bytes" in opts
if "--delay" in opts:
  TIME_DELTA = int(opts['--delay'])
if "--interface" in opts:
  INTERFACE = opts['--interface']
else:
  INTERFACE = pcapy.findalldevs()[0]
  data_win.addstr("Interface: %s\n\n" % INTERFACE)
if '--match' in opts:
  URL_MATCH = re.compile(opts['--match'])
else:
  URL_MATCH = None
if '--src' in opts:
  SRC_MATCH = re.compile(opts['--src'])
else:
  SRC_MATCH = None
if len(args)>1:
  filter = ' '.join(args)
else:
  filter = "tcp port 80"
data_win.addstr("Filter: %s\n\n" % filter)
data_win.refresh(0,0, 1,0, maxy-1,maxx)
cap = pcapy.open_live(INTERFACE, 1500, 0, 100)
filter = pcapy.compile(cap.datalink(), 1500, filter, 0, 24)
empty = [('', '') for x in range(30)]

connections = connection_counter()
rps = ps_counter()
data_download = ps_counter()
data_upload = ps_counter()
try:
  while True:
    # zero & cleanup counters
    sum_ip = data_counter()
    sum_host = data_counter()
    sum_path = data_counter()
    sum_urls = data_counter()
    rps.roll()
    data_download.roll()
    data_upload.roll()
    connections.cleanup()
    # run for delta time
    t0 = time.time()+TIME_DELTA
    while time.time()<t0:
      summary_win.addstr(0, maxx-3, "%2d" % int(t0-time.time()))
      summary_win.refresh(0,0, 0,0, 0,maxx)
      try:
        header,payload = cap.next()
      except pcapy.PcapError:
        continue
      if filter.filter(payload)!=0:
        dg = ipv4(payload)
        tcpdg = dg.data()
        data = tcpdg.data()
        if not tcpdg.type=="tcp":
          continue
        if data[:4]=='GET ' or data[:5]=='POST ':
          # http protocol first packet
          http_data = http(data).calc()
          if URL_MATCH and not URL_MATCH.search(http_data.pathname or ''):
            continue
          if SRC_MATCH and not SRC_MATCH.search(dg.ssrc()):
            continue
          rps.add()
          sum_host.add(http_data.hostname)
          sum_path.add(http_data.pathname)
          sum_ip.add(dg.ssrc())
          if http_data.hostname and http_data.pathname:
            sum_urls.add(http_data.hostname+http_data.pathname)
            if CHECK_BYTES:
              connections.add(tcpdg.len(),
                              dg.ssrc(), tcpdg.src(), dg.sdst(), tcpdg.dst(),
                              http_data.hostname+http_data.pathname)
        elif CHECK_BYTES:
          connections.add(tcpdg.len(),
                          dg.ssrc(), tcpdg.src(), dg.sdst(), tcpdg.dst())
        if tcpdg.is_fin():
          # FIN received, clean byte counter
          connections.close(dg.ssrc(), tcpdg.src(), dg.sdst(), tcpdg.dst())
        if tcpdg.src() == 80:
          data_upload.add(tcpdg.len())
        else:
          data_download.add(tcpdg.len())
    all_hosts = sum_host(reverse=True) + empty
    all_ips = sum_ip(reverse=True) + empty
    summary_win.clear()
    data_win.clear()
    l = 0
    if "--urls" in opts:
      summary_win.addnstr(0, 0, summary(), WIDTH)
      for row in (sum_urls(reverse=True) + empty)[:HEIGHT]:
        data_win.addnstr(l, 0, "%4s %s" % (row[1], row[0][:WIDTH-6]), WIDTH)
        l += 1
    elif CHECK_BYTES:
      summary_win.addnstr(0, 0, summary(), WIDTH)
      for row in (connections(reverse=True) + empty)[:HEIGHT]:
        data_win.addnstr(l, 0,
          "%7s %21s %s" \
          % (connections.b(row), row,
             connections.urls.get(row, '?')[-(WIDTH-31):]),
          WIDTH
        )
        l += 1
    else:
      summary_win.addnstr(0, 0, summary(), WIDTH)
      for l in range(HEIGHT):
        if l<len(all_hosts):
          row1 = all_hosts[l]
        else:
          row1 = ('', '')
        if l<len(all_ips):
          row2 = all_ips[l]
        else:
          row2 = ('', '')
        data_win.addnstr(l, 0,
          "%33s %4s            %16s %4s" \
          % (row1[0], row1[1], row2[0], row2[1]),
          WIDTH
        )
    summary_win.refresh(0,0, 0,0, 0,maxx)
    data_win.refresh(0,0, 1,0, maxy-1,maxx)
except KeyboardInterrupt:
  curses.endwin()
except Exception:
  curses.endwin()
  raise
