Source code for owtf.proxy.cache_handler
"""
owtf.proxy.cache_handler
~~~~~~~~~~~~~~~~~~~~~~~~
Inbound Proxy Module developed by Bharadwaj Machiraju (blog.tunnelshade.in) as a part of Google Summer of Code 2013
"""
import base64
import datetime
import hashlib
import json
import logging
import os
import re
import traceback
import tornado.httputil
from owtf.lib.filelock import FileLock
[docs]class CacheHandler(object):
"""
This class will be used by the request handler to
either load or dump to cache. Main things that are done
here :-
* The request_hash is generated here
* The file locks are managed here
* .rd files are created here
"""
def __init__(self, cache_dir, request, cookie_regex, blacklist):
# Initialized with the root cache directory, HTTP request object, cookie_regex, blacklist boolean
self.request = request
self.cache_dir = cache_dir
self.cookie_regex = cookie_regex
self.blacklist = blacklist
[docs] def calculate_hash(self, callback=None):
""" Based on blacklist boolean the cookie regex is used for filtering of cookies in request_hash
generation. However the original request is not tampered.
:param callback: Callback function
:type callback:
:return:
:rtype:
"""
cookie_string = ''
try:
if self.blacklist:
string_with_spaces = re.sub(self.cookie_regex, '', self.request.headers['Cookie']).strip()
cookie_string = ''.join(string_with_spaces.split(' '))
else:
cookies_matrix = re.findall(self.cookie_regex, self.request.headers['Cookie'])
for cookie_tuple in cookies_matrix:
for item in cookie_tuple:
if item:
cookie_string += item.strip()
except KeyError:
pass
request_mod = self.request.method + self.request.url + self.request.version
request_mod = request_mod + self.request.body + cookie_string
# To support proxying of ua-tester
try:
request_mod = request_mod + self.request.headers["User-Agent"]
except KeyError:
pass
# Websocket caching technique
try:
request_mod = request_mod + self.request.headers["Sec-Websocket-Key"]
except KeyError:
pass
md5_hash = hashlib.md5()
md5_hash.update(request_mod)
self.request_hash = md5_hash.hexdigest()
# This is the path to file inside url folder. This can be used for updating a html file
self.file_path = os.path.join(self.cache_dir, self.request_hash)
if callback:
callback(self.request_hash)
[docs] def create_response_object(self):
"""Create a proxy response object from cache file
:return:
:rtype:
"""
return response_from_cache(os.path.join(self.cache_dir, self.request_hash))
[docs] def dump(self, response):
"""This function takes in a HTTPResponse object and dumps the request
and response data. It also creates a .rd file with same file name
.. note::
This is used by transaction logger
:param response: The proxy response
:type response:
:return:
:rtype:
"""
try:
response_body = self.request.response_buffer.decode("utf-8")
binary_response = False
except UnicodeDecodeError:
response_body = base64.b64encode(self.request.response_buffer)
binary_response = True
cache_dict = {
'request_method': self.request.method,
'request_url': self.request.url,
'request_version': self.request.version,
'request_headers': dict(self.request.headers),
'request_body': self.request.body.decode('utf-8'),
'request_time': response.request_time,
'request_local_timestamp': self.request.local_timestamp.isoformat(),
'response_code': response.code,
'response_headers': dict(response.headers),
'response_body': response_body,
'response_cookies': response.headers.get_list("Set-Cookie"),
'binary_response': binary_response
}
with open(self.file_path, 'w') as outfile:
json.dump(cache_dict, outfile)
# This approach can be used as an alternative for object sharing
# This creates a file with hash as name and .rd as extension
open('%s.rd' % self.file_path, 'w').close()
self.file_lock.release()
[docs] def load(self):
"""This is the function which is called for every request. If file is not
found in cache, then a file lock is created for that and a None is returned.
:return: Load a transaction from cache
:rtype:
"""
try:
dummy = self.file_path
except Exception:
self.calculate_hash()
finally:
if os.path.isfile(self.file_path):
return self.create_response_object()
else:
self.file_lock = FileLock(self.file_path)
try:
self.file_lock.acquire()
except FileLockTimeoutException:
logging.debug("Lock could not be acquired %s" % traceback.print_exc)
# For handling race conditions
if os.path.isfile(self.file_path):
self.file_lock.release()
return self.create_response_object()
else:
return None
[docs]class DummyObject(object):
"""
This class is just used to create a fake response object
"""
def __init__(self):
self.dummy_obj = True
[docs]def response_from_cache(file_path):
"""A fake response object is created with necessary attributes
:param file_path: The file path for the cache file
:type file_path: `str`
:return:
:rtype:
"""
dummy_response = DummyObject()
cache_dict = json.loads(open(file_path, 'r').read())
dummy_response.code = cache_dict["response_code"]
dummy_response.headers = tornado.httputil.HTTPHeaders(cache_dict["response_headers"])
dummy_response.header_string = '\r\n'.join(
["%s: %s" % (name, value) for name, value in cache_dict["response_headers"].items()])
if cache_dict["binary_response"] is True:
dummy_response.body = base64.b64decode(cache_dict["response_body"])
else:
dummy_response.body = cache_dict["response_body"]
dummy_response.request_time = cache_dict["request_time"]
dummy_response.cookies = cache_dict["response_cookies"]
# Temp object is created as an alternative to use lists (or) dictionaries for passing values
return dummy_response
[docs]def request_from_cache(file_path):
"""A fake request object is created with necessary attributes
:param file_path: The file path for the cache file
:type file_path: `str`
:return:
:rtype:
"""
dummy_request = DummyObject()
cache_dict = json.loads(open(file_path, 'r').read())
dummy_request.local_timestamp = datetime.datetime.strptime(cache_dict["request_local_timestamp"].strip("\r\n"),
'%Y-%m-%dT%H:%M:%S.%f')
dummy_request.method = cache_dict["request_method"]
dummy_request.url = cache_dict["request_url"]
dummy_request.headers = cache_dict["request_headers"]
dummy_request.body = cache_dict["request_body"]
dummy_request.raw_request = "%s %s %s\r\n" % (cache_dict["request_method"], cache_dict["request_url"],
cache_dict["request_version"])
for name, value in cache_dict["request_headers"].items():
dummy_request.raw_request += "%s: %s\r\n" % (name, value)
if cache_dict["request_body"]:
dummy_request.raw_request += "%s\r\n\r\n" % cache_dict["request_body"]
return dummy_request