Source code for owtf.proxy.cache_handler

"""
owtf.proxy.cache_handler
~~~~~~~~~~~~~~~~~~~~~~~~

Inbound Proxy Module developed by Bharadwaj Machiraju (blog.tunnelshade.in) as a part of Google Summer of Code 2013
"""
import base64
import datetime
import hashlib
import json
import logging
import os
import re
import traceback

import tornado.httputil

from owtf.lib.filelock import FileLock


[docs]class CacheHandler(object): """ This class will be used by the request handler to either load or dump to cache. Main things that are done here :- * The request_hash is generated here * The file locks are managed here * .rd files are created here """ def __init__(self, cache_dir, request, cookie_regex, blacklist): # Initialized with the root cache directory, HTTP request object, cookie_regex, blacklist boolean self.request = request self.cache_dir = cache_dir self.cookie_regex = cookie_regex self.blacklist = blacklist
[docs] def calculate_hash(self, callback=None): """ Based on blacklist boolean the cookie regex is used for filtering of cookies in request_hash generation. However the original request is not tampered. :param callback: Callback function :type callback: :return: :rtype: """ cookie_string = '' try: if self.blacklist: string_with_spaces = re.sub(self.cookie_regex, '', self.request.headers['Cookie']).strip() cookie_string = ''.join(string_with_spaces.split(' ')) else: cookies_matrix = re.findall(self.cookie_regex, self.request.headers['Cookie']) for cookie_tuple in cookies_matrix: for item in cookie_tuple: if item: cookie_string += item.strip() except KeyError: pass request_mod = self.request.method + self.request.url + self.request.version request_mod = request_mod + self.request.body + cookie_string # To support proxying of ua-tester try: request_mod = request_mod + self.request.headers["User-Agent"] except KeyError: pass # Websocket caching technique try: request_mod = request_mod + self.request.headers["Sec-Websocket-Key"] except KeyError: pass md5_hash = hashlib.md5() md5_hash.update(request_mod) self.request_hash = md5_hash.hexdigest() # This is the path to file inside url folder. This can be used for updating a html file self.file_path = os.path.join(self.cache_dir, self.request_hash) if callback: callback(self.request_hash)
[docs] def create_response_object(self): """Create a proxy response object from cache file :return: :rtype: """ return response_from_cache(os.path.join(self.cache_dir, self.request_hash))
[docs] def dump(self, response): """This function takes in a HTTPResponse object and dumps the request and response data. It also creates a .rd file with same file name .. note:: This is used by transaction logger :param response: The proxy response :type response: :return: :rtype: """ try: response_body = self.request.response_buffer.decode("utf-8") binary_response = False except UnicodeDecodeError: response_body = base64.b64encode(self.request.response_buffer) binary_response = True cache_dict = { 'request_method': self.request.method, 'request_url': self.request.url, 'request_version': self.request.version, 'request_headers': dict(self.request.headers), 'request_body': self.request.body.decode('utf-8'), 'request_time': response.request_time, 'request_local_timestamp': self.request.local_timestamp.isoformat(), 'response_code': response.code, 'response_headers': dict(response.headers), 'response_body': response_body, 'response_cookies': response.headers.get_list("Set-Cookie"), 'binary_response': binary_response } with open(self.file_path, 'w') as outfile: json.dump(cache_dict, outfile) # This approach can be used as an alternative for object sharing # This creates a file with hash as name and .rd as extension open('%s.rd' % self.file_path, 'w').close() self.file_lock.release()
[docs] def load(self): """This is the function which is called for every request. If file is not found in cache, then a file lock is created for that and a None is returned. :return: Load a transaction from cache :rtype: """ try: dummy = self.file_path except Exception: self.calculate_hash() finally: if os.path.isfile(self.file_path): return self.create_response_object() else: self.file_lock = FileLock(self.file_path) try: self.file_lock.acquire() except FileLockTimeoutException: logging.debug("Lock could not be acquired %s" % traceback.print_exc) # For handling race conditions if os.path.isfile(self.file_path): self.file_lock.release() return self.create_response_object() else: return None
[docs]class DummyObject(object): """ This class is just used to create a fake response object """ def __init__(self): self.dummy_obj = True
[docs]def response_from_cache(file_path): """A fake response object is created with necessary attributes :param file_path: The file path for the cache file :type file_path: `str` :return: :rtype: """ dummy_response = DummyObject() cache_dict = json.loads(open(file_path, 'r').read()) dummy_response.code = cache_dict["response_code"] dummy_response.headers = tornado.httputil.HTTPHeaders(cache_dict["response_headers"]) dummy_response.header_string = '\r\n'.join( ["%s: %s" % (name, value) for name, value in cache_dict["response_headers"].items()]) if cache_dict["binary_response"] is True: dummy_response.body = base64.b64decode(cache_dict["response_body"]) else: dummy_response.body = cache_dict["response_body"] dummy_response.request_time = cache_dict["request_time"] dummy_response.cookies = cache_dict["response_cookies"] # Temp object is created as an alternative to use lists (or) dictionaries for passing values return dummy_response
[docs]def request_from_cache(file_path): """A fake request object is created with necessary attributes :param file_path: The file path for the cache file :type file_path: `str` :return: :rtype: """ dummy_request = DummyObject() cache_dict = json.loads(open(file_path, 'r').read()) dummy_request.local_timestamp = datetime.datetime.strptime(cache_dict["request_local_timestamp"].strip("\r\n"), '%Y-%m-%dT%H:%M:%S.%f') dummy_request.method = cache_dict["request_method"] dummy_request.url = cache_dict["request_url"] dummy_request.headers = cache_dict["request_headers"] dummy_request.body = cache_dict["request_body"] dummy_request.raw_request = "%s %s %s\r\n" % (cache_dict["request_method"], cache_dict["request_url"], cache_dict["request_version"]) for name, value in cache_dict["request_headers"].items(): dummy_request.raw_request += "%s: %s\r\n" % (name, value) if cache_dict["request_body"]: dummy_request.raw_request += "%s\r\n\r\n" % cache_dict["request_body"] return dummy_request