Source code for owtf.proxy.cache_handler

"""
owtf.proxy.cache_handler
~~~~~~~~~~~~~~~~~~~~~~~~

Inbound Proxy Module developed by Bharadwaj Machiraju (blog.tunnelshade.in) as a part of Google Summer of Code 2013
"""
import base64
import datetime
import hashlib
import json
import logging
import os
import re
import traceback

import tornado.httputil

from owtf.lib.filelock import FileLock
from owtf.utils.strings import to_str, utf8


[docs]class CacheHandler(object): """ This class will be used by the request handler to either load or dump to cache. Main things that are done here :- * The request_hash is generated here * The file locks are managed here * .rd files are created here """ def __init__(self, cache_dir, request, cookie_regex, blacklist): # Initialized with the root cache directory, HTTP request object, cookie_regex, blacklist boolean self.request = request self.cache_dir = cache_dir self.cookie_regex = cookie_regex self.blacklist = blacklist
[docs] def calculate_hash(self, callback=None): """ Based on blacklist boolean the cookie regex is used for filtering of cookies in request_hash generation. However the original request is not tampered. :param callback: Callback function :type callback: :return: :rtype: """ cookie_string = "" try: if self.blacklist: string_with_spaces = re.sub( self.cookie_regex, "", self.request.headers["Cookie"] ).strip() cookie_string = "".join(string_with_spaces.split(" ")) else: cookies_matrix = re.findall( self.cookie_regex, self.request.headers["Cookie"] ) for cookie_tuple in cookies_matrix: for item in cookie_tuple: if item: cookie_string += item.strip() except KeyError: pass request_mod = self.request.method + self.request.url + self.request.version request_mod = request_mod + to_str(self.request.body) + cookie_string # To support proxying of ua-tester try: request_mod = request_mod + self.request.headers["User-Agent"] except KeyError: pass # Websocket caching technique try: request_mod = request_mod + self.request.headers["Sec-Websocket-Key"] except KeyError: pass md5_hash = hashlib.md5() md5_hash.update(utf8(request_mod)) self.request_hash = md5_hash.hexdigest() # This is the path to file inside url folder. This can be used for updating a html file self.file_path = os.path.join(self.cache_dir, self.request_hash) if callback: callback(self.request_hash)
[docs] def create_response_object(self): """Create a proxy response object from cache file :return: :rtype: """ return response_from_cache(os.path.join(self.cache_dir, self.request_hash))
[docs] def dump(self, response): """This function takes in a HTTPResponse object and dumps the request and response data. It also creates a .rd file with same file name .. note:: This is used by transaction logger :param response: The proxy response :type response: :return: :rtype: """ try: response_body = self.request.response_buffer binary_response = False except UnicodeDecodeError: response_body = base64.b64encode(self.request.response_buffer) binary_response = True cache_dict = { "request_method": self.request.method, "request_url": self.request.url, "request_version": self.request.version, "request_headers": dict(self.request.headers), "request_body": to_str(self.request.body), "request_time": response.request_time, "request_local_timestamp": self.request.local_timestamp.isoformat(), "response_code": response.code, "response_headers": dict(response.headers), "response_body": response_body, "response_cookies": response.headers.get_list("Set-Cookie"), "binary_response": binary_response, } with open(self.file_path, "w") as outfile: json.dump(cache_dict, outfile) # This approach can be used as an alternative for object sharing # This creates a file with hash as name and .rd as extension open("{}.rd".format(self.file_path), "w").close() self.file_lock.release()
[docs] def load(self): """This is the function which is called for every request. If file is not found in cache, then a file lock is created for that and a None is returned. :return: Load a transaction from cache :rtype: """ try: dummy = self.file_path except Exception: self.calculate_hash() finally: if os.path.isfile(self.file_path): return self.create_response_object() else: self.file_lock = FileLock(self.file_path) try: self.file_lock.acquire() except FileLockTimeoutException: logging.debug("Lock could not be acquired %s", traceback.print_exc) # For handling race conditions if os.path.isfile(self.file_path): self.file_lock.release() return self.create_response_object() else: return None
[docs]class DummyObject(object): """ This class is just used to create a fake response object """ def __init__(self): self.dummy_obj = True
[docs]def response_from_cache(file_path): """A fake response object is created with necessary attributes :param file_path: The file path for the cache file :type file_path: `str` :return: :rtype: """ dummy_response = DummyObject() with open(file_path, "r") as f: cache_dict = json.loads(f.read()) dummy_response.code = cache_dict["response_code"] dummy_response.headers = tornado.httputil.HTTPHeaders( cache_dict["response_headers"] ) dummy_response.header_string = "\r\n".join( [ "{!s}: {!s}".format(name, value) for name, value in cache_dict["response_headers"].items() ] ) if cache_dict["binary_response"] is True: dummy_response.body = base64.b64decode(cache_dict["response_body"]) else: dummy_response.body = cache_dict["response_body"] dummy_response.request_time = cache_dict["request_time"] dummy_response.cookies = cache_dict["response_cookies"] # Temp object is created as an alternative to use lists (or) dictionaries for passing values return dummy_response
[docs]def request_from_cache(file_path): """A fake request object is created with necessary attributes :param file_path: The file path for the cache file :type file_path: `str` :return: :rtype: """ dummy_request = DummyObject() with open(file_path, "r") as f: cache_dict = json.loads(f.read()) dummy_request.local_timestamp = datetime.datetime.strptime( cache_dict["request_local_timestamp"].strip("\r\n"), "%Y-%m-%dT%H:%M:%S.%f" ) dummy_request.method = cache_dict["request_method"] dummy_request.url = cache_dict["request_url"] dummy_request.headers = cache_dict["request_headers"] dummy_request.body = cache_dict["request_body"] dummy_request.raw_request = "{!s} {!s} {!s}\r\n".format( cache_dict["request_method"], cache_dict["request_url"], cache_dict["request_version"], ) for name, value in cache_dict["request_headers"].items(): dummy_request.raw_request += "{!s}: {!s}\r\n".format(name, value) if cache_dict["request_body"]: dummy_request.raw_request += "{!s}\r\n\r\n".format(cache_dict["request_body"]) return dummy_request