百度图片url解码
使用query词抓取百度图片数据的时候,为了简化抓取,使用firebug跟踪到百度图片请求返回了一个json格式数据,例如http://image.baidu.com/i?tn=resultjson_com&ie=gbk&word=%B7%B6%B1%F9%B1%F9&cg=girl&pn=0&rn=60(该url已简化)。我们只需抓取这个json数据即可,但是这个json格式中的objectUrl和fromUrl却是加密了的数据,如ippr_z2C$qAzdH3FAzdH3Fjgp_z&e3Bvg6_z&e3BvgAzdH3FstfpAzdH3Fda8candnAzdH3FWada8candnn98c9lndadl9_z&e3B3r2。后又使用firebug发现,鼠标在点击连接时并没有向百度解密的请求,故判断解密程序在javascript中。
本来打算使用SpiderMonkey执行js来解密,但今天在网上偶然看到一段js解密url的代码,来自百度知道http://zhidao.baidu.com/link?url=APKpO_ktmLa4QE0ZYlOqKyNdLul4rkDbV-LXBeDv40sHe0yDXS-LGc4A3ArNL2RoSY4xD2Z8M_5vdmjm2nAOILhYWXE1ErtGMiPbi-paK4e,测试了下是对的,到这里这个问题总算搞定了。
解密方法很简单,秘钥是一个字符的对应关系,有2种映射:(1)多个字符映射为一个字符,"_z2C$q"=>":","_z&e3B"=>".","AzdH3F"=>"/",(2)单个字符映射为单字符。根据这个关系可将密文解密为明文,实现并不困难。
C语言的实现:
#include <stdio.h> #include <string.h> // return length; int Decode(const char *src, const char *table, char *dest) { char *d = dest; const char *p = src; while (*p != 0) { if (*p == "_") { if (strncmp(p, "_z2C$q", 6) == 0) { *dest++ = ":"; p += 6; } else if (strncmp(p, "_z&e3B", 6) == 0) { *dest++ = "."; p += 6; } else { *dest++ = *p++; } } else if (*p == "A") { if (strncmp(p, "AzdH3F", 6) == 0) { *dest++ = "/"; p += 6; } else { *dest++ = *p++; } } else if (table[*p] == 0) { *dest++ = *p++; } else { *dest++ = table[*p++]; } } *dest = 0; return dest - d; } int main(int argc, char **argv) { static char table[128] = {0}; table["w"] = "a"; table["k"] = "b"; table["v"] = "c"; table["1"] = "d"; table["j"] = "e"; table["u"] = "f"; table["2"] = "g"; table["i"] = "h"; table["t"] = "i"; table["3"] = "j"; table["h"] = "k"; table["s"] = "l"; table["4"] = "m"; table["g"] = "n"; table["5"] = "o"; table["r"] = "p"; table["q"] = "q"; table["6"] = "r"; table["f"] = "s"; table["p"] = "t"; table["7"] = "u"; table["e"] = "v"; table["o"] = "w"; table["8"] = "1"; table["d"] = "2"; table["n"] = "3"; table["9"] = "4"; table["c"] = "5"; table["m"] = "6"; table["0"] = "7"; table["b"] = "8"; table["l"] = "9"; table["a"] = "0"; char *url = "ippr_z2C$qAzdH3FAzdH3Ft428_z&e3Bd0_z&e3BvgAzdH3Ft4w2jfAzdH3Fda8a8aAzdH3FddAzdH3F8db00d0dnd_9amln8aa_z&e3B3r2"; char dest[1024]; Decode(url, table, dest); printf("%s ", dest); return 0; }自己写的,不能保证效率。
输出:http://img1.27.cn/images/201010/22/1287727232_40693100.jpg
JavaScript的实现:
<script type="text/javascript"> var f = { w: "a", k: "b", v: "c", 1: "d", j: "e", u: "f", 2: "g", i: "h", t: "i", 3: "j", h: "k", s: "l", 4: "m", g: "n", 5: "o", r: "p", q: "q", 6: "r", f: "s", p: "t", 7: "u", e: "v", o: "w", 8: "1", d: "2", n: "3", 9: "4", c: "5", m: "6", 0: "7", b: "8", l: "9", a: "0", _z2C$q: ":", "_z&e3B": ".", AzdH3F: "/" }; var url = "ippr_z2C$qAzdH3FAzdH3Fjgp_z&e3Bvg6_z&e3BvgAzdH3FstfpAzdH3Fda8candnAzdH3FWada8candnn98c9lndadl9_z&e3B3r2"; var h = /(_z2C$q|_z&e3B|AzdH3F)/g; var e = url.replace(h, function(t, e) { return f[e] }); var s = /([a-wd])/g; e = e.replace(s, function(t, e) { return f[e] }); document.write(e); </script>
输出:http://ent.cnr.cn/list/20150323/W020150323341549320294.jpg
PHP的实现:
<?PHP $f = array("w"=>"a","k"=>"b","v"=>"c","1"=>"d","j"=>"e","u"=>"f","2"=>"g","i"=>"h","t"=>"i","3"=>"j","h"=>"k","s"=>"l","4"=>"m","g"=>"n","5"=>"o","r"=>"p","q"=>"q","6"=>"r","f"=>"s","p"=>"t","7"=>"u","e"=>"v","o"=>"w","8"=>"1","d"=>"2","n"=>"3","9"=>"4","c"=>"5","m"=>"6","0"=>"7","b"=>"8","l"=>"9","a"=>"0","_z2C$q"=>":","_z&e3B"=>".","AzdH3F"=>"/"); $url = "ippr_z2C$qAzdH3FAzdH3Ft4f_z&e3Bw6ptg2nmc_z&e3Bv54AzdH3Fg51jAzdH3F8da90bAzdH3F"; // 这里需要用单引号 $url = preg_replace_callback("/_z2C$q|_z&e3B|AzdH3F/", function($matches){ global $f; return $f[$matches[0]]; }, $url); $url = preg_replace_callback("/[a-wd]/", function($matches){ global $f; return $f[$matches[0]]; }, $url); echo $url; ?>
- 上一篇:没有了
- 下一篇:没有了