javascript脚本压缩工具JSEncoder实现

1、算法原理：
从javascript脚本文件中提取单词，存入字典表中，这里使用|分割的字符串，然后将单词对应的序号(仿base64编码值)写入原来代码的地方,形成压缩后的js代码

2、压缩效果：
jquery-1.2.3.js原始文件大小95kb->[其他工具处理，去掉回车、注释等]jquery-1.2.3.min.js，大小53kb
=>本文工具压缩后：32kb

3、可选其他工具：
JSA 2.0 pre-alpha：http://sourceforge.net/project/showfiles.php?group_id=175776
packer：http://dean.edwards.name/packer/

4、下载（包含源代码在jar文件中）
2008.4 Ver：0.5 下载

5、源代码
java代码如下，写完代码之后才发现这是JSA(http://sourceforge.net/project/showfiles.php?group_id=175776)的压缩算法的再实现，不过好像作者没有开源，
本文算作是一种技术上的研究了。
通过对jquery-1.2.3.min.js http://code.google.com/p/jqueryjs/downloads/detail?name=jquery-1.2.3.min.js
进行压缩测试通过，压缩率>40%.

1package com.cngd.jstool;
2
3import java.io.FileReader;
4import java.io.BufferedReader;
5import java.io.FileOutputStream;
6import java.io.IOException;
7import java.util.regex.Matcher;
8import java.util.regex.Pattern;
9import java.util.Vector;
10import java.text.DecimalFormat;
11
12/** *//**
13 * JSEncoder脚本压缩工具
14 * <p/>
15 * 写完代码之后才发现这是JSA(http://sourceforge.net/project/showfiles.php?group_id=175776)的压缩算法的再实现
16 * 针对jquery-1.2.3.min.js这个文件的压缩比率结果比较如下
17 * ——————————————————————-
18 * 原始大小 | JSEncoder | JSA-20071021（2.0 pre-alpha) | jquery packer算法
19 * ——————————————————————-
20 * 53kb | 32kb | 29kb | 29kb
21 * ——————————————————————-
22 * 因为JSA进一步将局部变量进行了压缩，因此相比较更小
23 * <p/>
24 * <p/>
25 * <p/>
26 * User: (在路上 http://www.cnblogs.com/midea0978)
27 * Date: 2008-4-18
28 * Version:0.5
29 */
30public class JSEncoder {
31 public static final String ENCODE_BASE64 = “ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_”;
32 public boolean isDebug = false;
33
34 /** *//**
35 * @param filename js filename
36 * @param offset offset>=0指定偏移变量，不同的offset可以实现代码表位置的变换，较小的offset可以获得更大的压缩率
37 * @return 压缩后的代码
38 */
39 public String encode(String filename, int offset) throws Exception {
40 String jscript = readFileData(filename);
41 int size = jscript.length();
42 jscript = jscript.replaceAll(“\\n”, ” “);
43 //替换\\->\\\\
44 jscript = jscript.replaceAll(“\\\\\\\\”, “\\\\\\\\\\\\\\\\”);
45 //替换单引号’=>\\’
46 jscript = jscript.replaceAll(“\\\\’”, “\\\\\\\\\\\\’”);
47
48 Pattern p = Pattern.compile(“([\\\\w\\\\]+)”);
49 Matcher m = p.matcher(jscript);
50 String element;
51 Vector<String> dict = new Vector<String>();
52 int index;
53 StringBuffer encscript = new StringBuffer();
54 StringBuffer dicttab = new StringBuffer();
55
56 debugInfo(“=====编码字典对应表=====”);
57 while (m.find()) {
58 element = m.group(1).trim();
59 if (!dict.contains(element)) {
60 dict.add(element);
61 index = dict.size() – 1;
62 } else {
63 index = dict.indexOf(element);
64 }
65 debugInfo(index + “==>” + element);
66 m.appendReplacement(encscript, Base64Encode(offset + index + 1));
67 }
68 for (String o : dict) dicttab.append(o + “|”);
69 m.appendTail(encscript);
70 debugInfo(“===== 编码字典结束 =====”);
71 debugInfo(“Offset=” + offset + “,字典大小=” + dict.size());
72 debugInfo(“压缩后的代码：\\n” + encscript.toString());
73 String dictstr = dicttab.substring(0, dicttab.length() – 1).toString();
74 debugInfo(“字典字符串:\\n” + dictstr);
75 String res = formatCode(encscript.toString(), dictstr, dict.size(), offset);
76 int packsize = res.length();
77 DecimalFormat df = new DecimalFormat(“######.0″);
78 System.out.println(“\\n原始文件大小：” + size + “\\n压缩后文件大小：” + packsize);
79 System.out.println(“=================\\n压缩比率：” + df.format((size – packsize) * 100.0 / size) + “%”);
80 return res;
81 }
82
83 private String readFileData(String filename) throws IOException {
84 BufferedReader in = new BufferedReader(new FileReader(filename));
85 StringBuffer sb = new StringBuffer();
86 while (in.ready()) {
87 sb.append(in.readLine() + “\\n”);
88 }
89 in.close();
90 return sb.toString();
91 }
92
93 private void debugInfo(String txt) {
94 if (isDebug) System.out.println(txt);
95 }
96
97 public static void main(String[] args) {
98 System.out.println(“JSEncoder 0.5 by midea0978 2008.4″);
99 System.out.println(“=====================================”);
100 System.out.println(“http://www.cnblogs.com/midea0978\\n”);
101 if (args.length < 2) {
102 System.out.println(“Usage:java JSEncoder.jar jsfile outputfile [offset].”);
103 System.exit(0);
104 }
105 try {
106 System.out.println(“输入文件: ” + args[0]);
107 System.out.println(“输出文件: ” + args[1]);
108 JSEncoder util = new JSEncoder();
109 int offset = args.length >= 3 ? Integer.parseInt(args[2]) : 0;
110 String code = util.encode(args[0], offset);
111 FileOutputStream fs = new FileOutputStream(args[1]);
112 fs.write(code.getBytes());
113 fs.close();
114 } catch (Exception e) {
115 e.printStackTrace();
116 }
117 }
118
119 /** *//**
120 * 仿Base64解码
121 *
122 * @param c 待编码的数字
123 * @return 编码值
124 */
125 private String Base64Encode(int c) throws Exception {
126 String res;
127 if (c < 0) throw new Exception(“Error:Offset必须>=0.”);
128 if (c > 63)
129 res = Base64Encode(c >> 6) + Base64Encode(c & 63);
130 else {
131 //为了配合appendReplacement方法的使用，将替换为\\
132 res = c == 63 ? “\\\\” : String.valueOf(ENCODE_BASE64.charAt(c));
133 }
134 return res;
135 }
136
137 private String formatCode(String enc, String dict, int size, int offset) {
138 StringBuffer str = new StringBuffer();
139 str.append(“/* Compressed by JSEncoder */\\neval(function(E,I,A,D,J,K,L,H){function C(A){return A<62?String.fromCharCode(A+=A<26?65:A<52?71:-4):A<63?’_\':A<64?”:C(A>>6)+C(A&63)}while(A>0)K[C(D--)]=I[--A];function N(A){return K[A]==L[A]?A:K[A]}if(”.replace(/^/,String)){var M=E.match(J),B=M[0],F=E.split(J),G=0;if(E.indexOf(F[0]))F=[\'\'].concat(F);do{H[A++]=F[G++];H[A++]=N(B)}while(B=M[G]);H[A++]=F[G]||”;return H.join(”)}return E.replace(J,N)}(“);
140 str.append(“‘” + enc + “‘,”);
141 str.append(“‘” + dict + “‘.split(‘|’),”);
142 str.append(size + “,” + (size + offset) + “,/[\\\\w\\\\]+/g, {}, {}, []))”);
143 return str.toString();
144 }
145
146}
147

来源：http://www.cnblogs.com/midea0978

热门分类

热门标签

推荐阅读