From 3374374af0812ad8c0319a77d2890adcff70400a Mon Sep 17 00:00:00 2001 From: John Bintz Date: Tue, 14 Oct 2008 12:54:35 -0400 Subject: [PATCH] start working on reducing the size of serialized data --- src/CodeParser.hx | 61 ++++++++++++++----------------- src/ConstantToken.hx | 2 +- src/ConstantTokenProcessor.hx | 3 +- src/FunctionToken.hx | 2 +- src/FunctionTokenProcessor.hx | 3 +- src/JavaScriptTarget.hx | 8 ++-- src/RegenerateDataFiles.hx | 2 +- src/TestFunctionTokenProcessor.hx | 35 +++++++++++++----- src/Token.hx | 12 +++--- src/TokenProcessor.hx | 19 ++++++++++ 10 files changed, 90 insertions(+), 57 deletions(-) diff --git a/src/CodeParser.hx b/src/CodeParser.hx index 482f9ae..136a17f 100644 --- a/src/CodeParser.hx +++ b/src/CodeParser.hx @@ -6,6 +6,8 @@ class CodeParser { public var token_processors(get_token_processors, null) : Hash; public var ignored_modules(get_ignored_modules, null) : Hash; + public static var processor_types = [ "FunctionTokenProcessor", "ConstantTokenProcessor" ]; + public function new() { this.token_processors = new Hash(); this.ignored_modules = new Hash(); @@ -16,34 +18,30 @@ class CodeParser { Load all possible token processors from disk. **/ public function load_processors_from_disk() { - var function_processor = new FunctionTokenProcessor(); - if (!function_processor.load_from_cache()) { - function_processor.populate_from_file(); - function_processor.save_to_cache(); + for (processor_type_name in processor_types) { + var processor : TokenProcessor = Type.createInstance(Type.resolveClass(processor_type_name), []); + + if (!processor.load_from_cache()) { + processor.populate_from_file(); + processor.save_to_cache(); + } + + this.token_processors.set(processor_type_name, processor); } - - this.token_processors.set(Type.getClassName(Type.getClass(function_processor)), function_processor); - - var constant_processor = new ConstantTokenProcessor(); - if (!constant_processor.load_from_cache()) { - constant_processor.populate_from_files(); - constant_processor.save_to_cache(); - } - - this.token_processors.set(Type.getClassName(Type.getClass(constant_processor)), constant_processor); } #end + /** + Load all possible token processors form haXe Resources. + **/ public function load_processors_from_resources() { - var function_processor = new FunctionTokenProcessor(); - function_processor.load_from_resource(); + for (processor_type_name in processor_types) { + var processor : TokenProcessor = Type.createInstance(Type.resolveClass(processor_type_name), []); - this.token_processors.set(Type.getClassName(Type.getClass(function_processor)), function_processor); + processor.load_from_resource(); - var constant_processor = new ConstantTokenProcessor(); - constant_processor.load_from_resource(); - - this.token_processors.set(Type.getClassName(Type.getClass(constant_processor)), constant_processor); + this.token_processors.set(processor_type_name, processor); + } } public function get_token_processors() { return this.token_processors; } @@ -63,9 +61,6 @@ class CodeParser { var results = new Array(); this.ignored_modules = new Hash(); - var function_token_processor = this.token_processors.get("FunctionTokenProcessor"); - var constant_token_processor = this.token_processors.get("ConstantTokenProcessor"); - var tokens_found = new Hash(); var tokens_to_ignore = new Array>(); var flattened_tokens = new Hash(); @@ -119,17 +114,13 @@ class CodeParser { if (!tokens_found.exists(token)) { if (!flattened_tokens.exists(token)) { - if (is_function) { - if (function_token_processor.tokenHash.exists(token)) { - results.push(function_token_processor.tokenHash.get(token).toResult()); - } - } else { - if (constant_token_processor.tokenHash.exists(token)) { - results.push(constant_token_processor.tokenHash.get(token).toResult()); + for (token_processor in this.token_processors.iterator()) { + if (token_processor.tokenHash.exists(token)) { + results.push(token_processor.tokenHash.get(token).toResult()); break; } } + tokens_found.set(token, true); } - tokens_found.set(token, true); } } else { if (current == "/") { @@ -172,8 +163,10 @@ class CodeParser { if (is_capturing) { var token = s.substr(capture_index, index - capture_index); - if (constant_token_processor.tokenHash.exists(token)) { - results.push(constant_token_processor.tokenHash.get(token).toResult()); + for (token_processor in this.token_processors.iterator()) { + if (token_processor.tokenHash.exists(token)) { + results.push(token_processor.tokenHash.get(token).toResult()); break; + } } } diff --git a/src/ConstantToken.hx b/src/ConstantToken.hx index 1d65bd1..401f947 100644 --- a/src/ConstantToken.hx +++ b/src/ConstantToken.hx @@ -1,5 +1,5 @@ class ConstantToken extends Token { - override public function getTokenType() { + override public function get_token_type() { return ResultType.Constant; } } \ No newline at end of file diff --git a/src/ConstantTokenProcessor.hx b/src/ConstantTokenProcessor.hx index fb455ff..09d6a69 100644 --- a/src/ConstantTokenProcessor.hx +++ b/src/ConstantTokenProcessor.hx @@ -1,6 +1,7 @@ class ConstantTokenProcessor extends TokenProcessor { public static var cachePath : String = "../data/constant_tokens_cache.hxd"; override public function get_cache_path() { return ConstantTokenProcessor.cachePath; } + override public function get_default_token_type() { return ConstantToken; } public static var source_path : String = "../data"; public static var source_file_pattern : EReg = ~/phpdoc_constants_.*\.xml/; @@ -16,7 +17,7 @@ class ConstantTokenProcessor extends TokenProcessor { ]; #if neko - public function populate_from_files() { + public override function populate_from_file() { this.tokenHash = new Hash(); for (file in neko.FileSystem.readDirectory(source_path)) { if (source_file_pattern.match(file)) { diff --git a/src/FunctionToken.hx b/src/FunctionToken.hx index 54a73b7..0e8b502 100644 --- a/src/FunctionToken.hx +++ b/src/FunctionToken.hx @@ -1,5 +1,5 @@ class FunctionToken extends Token { - override public function getTokenType() { + override public function get_token_type() { return ResultType.Function; } } \ No newline at end of file diff --git a/src/FunctionTokenProcessor.hx b/src/FunctionTokenProcessor.hx index bd9f046..0bd0eba 100644 --- a/src/FunctionTokenProcessor.hx +++ b/src/FunctionTokenProcessor.hx @@ -2,9 +2,10 @@ class FunctionTokenProcessor extends TokenProcessor { public static var cachePath : String = "../data/functions_tokens_cache.hxd"; override public function get_cache_path() { return FunctionTokenProcessor.cachePath; } public static var sourcePath : String = "../data/phpdoc_function_versions.xml"; + override public function get_default_token_type() { return FunctionToken; } #if neko - public function populate_from_file() { + public override function populate_from_file() { this.populate_from_string(neko.io.File.getContent(sourcePath)); } diff --git a/src/JavaScriptTarget.hx b/src/JavaScriptTarget.hx index 285f229..157170e 100644 --- a/src/JavaScriptTarget.hx +++ b/src/JavaScriptTarget.hx @@ -1,3 +1,8 @@ +import FunctionTokenProcessor; +import ConstantTokenProcessor; +import FunctionToken; +import ConstantToken; + /** The JavaScript functionality of Harmonious Code. **/ @@ -9,9 +14,6 @@ class JavaScriptTarget { static public var manually_ignored_modules : Hash; static public function main() { - var function_token = new FunctionToken("a","a"); - var constant_token = new ConstantToken("a","a"); - code_parser = new CodeParser(); code_parser.load_processors_from_resources(); diff --git a/src/RegenerateDataFiles.hx b/src/RegenerateDataFiles.hx index 51e97cc..843482a 100644 --- a/src/RegenerateDataFiles.hx +++ b/src/RegenerateDataFiles.hx @@ -10,7 +10,7 @@ class RegenerateDataFiles { var constantProcessor = new ConstantTokenProcessor(); if (!constantProcessor.load_from_cache()) { neko.Lib.print("Regenerating constants cache...\n"); - constantProcessor.populate_from_files(); + constantProcessor.populate_from_file(); constantProcessor.save_to_cache(); } } diff --git a/src/TestFunctionTokenProcessor.hx b/src/TestFunctionTokenProcessor.hx index e1405ae..d28593a 100644 --- a/src/TestFunctionTokenProcessor.hx +++ b/src/TestFunctionTokenProcessor.hx @@ -1,17 +1,34 @@ class TestFunctionTokenProcessor extends haxe.unit.TestCase { - static var functionName : String = "test"; - static var functionFrom : String = "5.2"; - var testXml : String; - var tokenProcessor : FunctionTokenProcessor; + static var function_name : String = "test"; + static var function_from : String = "5.2"; + var token_processor : FunctionTokenProcessor; public override function setup() { - testXml = " "; - tokenProcessor = new FunctionTokenProcessor(); - tokenProcessor.populate_from_string(testXml); + var test_xml = " "; + token_processor = new FunctionTokenProcessor(); + token_processor.populate_from_string(test_xml); } public function testGenerateSampleToken() { - var testTokenArray = [ new FunctionToken(functionName, functionFrom) ]; - assertTrue(tokenProcessor.tokenHash.exists(functionName)); + assertTrue(token_processor.tokenHash.exists(function_name)); + } + + public function testSerializeInfo() { + var test_xml = " "; + token_processor.populate_from_string(test_xml); + + var target_token_hash = "{one => { version => PHP 4, PHP 5, token => one }, two => { version => PHP 4, PHP 5, token => two }}"; + + assertEquals(target_token_hash, token_processor.tokenHash.toString()); + + var unwound_tokens = token_processor.unwind_tokens(); + + assertTrue(unwound_tokens.toString().length < target_token_hash.length); + trace(unwound_tokens.toString().length + " bytes vs. " + target_token_hash.length); + + token_processor = new FunctionTokenProcessor(); + token_processor.populate_from_unwound_tokens(unwound_tokens); + + assertEquals(target_token_hash, token_processor.tokenHash.toString()); } } \ No newline at end of file diff --git a/src/Token.hx b/src/Token.hx index 9f8c74e..b8a6d23 100644 --- a/src/Token.hx +++ b/src/Token.hx @@ -1,16 +1,16 @@ class Token { - public var token(getToken, null) : String; - public var version(getVersion, null) : String; - public var token_type(getTokenType, null) : ResultType; + public var token(get_token, null) : String; + public var version(get_version, null) : String; + public var token_type(get_token_type, null) : ResultType; public function new(t : String, ?m : String) { this.token = t; this.version = m; } - public function getToken() { return this.token; } - public function getVersion() { return this.version; } - public function getTokenType() { return ResultType.Generic; } + public function get_token() { return this.token; } + public function get_version() { return this.version; } + public function get_token_type() { return ResultType.Generic; } public function toResult() { return new Result(this.token_type, this.token, this.version); diff --git a/src/TokenProcessor.hx b/src/TokenProcessor.hx index 0a61894..581a818 100644 --- a/src/TokenProcessor.hx +++ b/src/TokenProcessor.hx @@ -4,6 +4,7 @@ class TokenProcessor { public function new() { this.tokenHash = new Hash(); } public function get_cache_path() { return TokenProcessor.cachePath; } + public function get_default_token_type() { return Token; } #if neko public function load_from_cache() : Bool { @@ -20,9 +21,27 @@ class TokenProcessor { fh.writeString(haxe.Serializer.run(this.tokenHash)); fh.close(); } + + public function populate_from_file() {} #end public function load_from_resource() { this.tokenHash = haxe.Unserializer.run(haxe.Resource.getString(this.get_cache_path())); } + + public function unwind_tokens() : Hash { + var unwound_tokens = new Hash(); + for (token in this.tokenHash.keys()) { + unwound_tokens.set(token, this.tokenHash.get(token).version); + } + return unwound_tokens; + } + + public function populate_from_unwound_tokens(unwound_tokens : Hash) { + this.tokenHash = new Hash(); + var token_type = get_default_token_type(); + for (token in unwound_tokens.keys()) { + this.tokenHash.set(token, Type.createInstance(token_type, [ token, unwound_tokens.get(token) ])); + } + } } \ No newline at end of file