Archive

Posts Tagged ‘edoc_lib’

How to escape special characters

February 26th, 2009

It took me a while to look through Erlang Documentation and found the utility function edoc_lib:escape_uri/1

1> edoc_lib:escape_uri("This sentence contains & % @ \n to be escaped").
"This%20sentence%20contains%20%26%20%25%20%40%20%a%20to%20be%20escaped"

However, the function is not fully compatible if you use the function to escape query string when making HTTP Post

Let’s try to escape some special characters

2> P1=edoc_lib:escape_uri("This sentence contains & % @").
"This%20sentence%20contains%20%26%20%25%20%40"
3> QueryString1="param1=" ++ P1.
"param1=This%20sentence%20contains%20%26%20%25%20%40"
4> http:request(post, {"http://erlangexamples.com/examples/post.php", [], 
4> "application/x-www-form-urlencoded", QueryString1}, [], []).
{ok,{{"HTTP/1.1",200,"OK"},
[{"connection","Keep-Alive"},
{"date","Fri, 27 Feb 2009 15:47:50 GMT"},
{"server",
"Apache/1.3.41 (Unix) Sun-ONE-ASP/4.0.2 Resin/3.0.25 mod_fastcgi/2.4.6 mod_log_bytes/1.2 mod_bwlimited/1.4 mod_auth_passthrough/1.8 FrontPage/5.0.2.2635 mod_ssl/2.8.31 OpenSSL/0.9.7a"},
{"content-length","59"},
{"content-type","text/html"},
{"x-powered-by","PHP/4.4.9"},
{"keep-alive","timeout=5, max=10"}],
"\nOnly accept POST:\nparam1 = 'This sentence contains & % @'\n"}}

It looks okie, now try some others

1> inets:start().
ok
2> P2=edoc_lib:escape_uri("newline\ncharacter").
"newline%acharacter"
3> QueryString2="p2=" ++ P2.
"p2=newline%acharacter"
4> http:request(post, {"http://erlangexamples.com/examples/post.php", [], 
4> "application/x-www-form-urlencoded", QueryString2}, [], []).
{ok,{{"HTTP/1.1",200,"OK"},
[{"connection","Keep-Alive"},
{"date","Fri, 27 Feb 2009 15:51:49 GMT"},
{"server",
"Apache/1.3.41 (Unix) Sun-ONE-ASP/4.0.2 Resin/3.0.25 mod_fastcgi/2.4.6 mod_log_bytes/1.2 mod_bwlimited/1.4 mod_auth_passthrough/1.8 FrontPage/5.0.2.2635 mod_ssl/2.8.31 OpenSSL/0.9.7a"},
{"content-length","85"},
{"content-type","text/html"},
{"x-powered-by","PHP/4.4.9"},
{"keep-alive","timeout=5, max=10"}],
"\nOnly accept POST:\np2 = 'newline?haracter'\n"}}

You can see that edoc_lib:escape_uri/1 failed to escape newline character. Although it translated \n -> %a, %0a must be the correct translation

5> QueryString3="p2=newline%0acharacter".
"p2=newline%0acharacter"
6> http:request(post, {"http://erlangexamples.com/examples/post.php", [], 
6> "application/x-www-form-urlencoded", QueryString3}, [], []).
{ok,{{"HTTP/1.1",200,"OK"},
[{"connection","Keep-Alive"},
{"date","Fri, 27 Feb 2009 15:54:55 GMT"},
{"server",
"Apache/1.3.41 (Unix) Sun-ONE-ASP/4.0.2 Resin/3.0.25 mod_fastcgi/2.4.6 mod_log_bytes/1.2 mod_bwlimited/1.4 mod_auth_passthrough/1.8 FrontPage/5.0.2.2635 mod_ssl/2.8.31 OpenSSL/0.9.7a"},
{"content-length","84"},
{"content-type","text/html"},
{"x-powered-by","PHP/4.4.9"},
{"keep-alive","timeout=5, max=10"}],
"\nOnly accept POST:\np2 = 'newline\ncharacter'\n"}}

I’ve made minor modification to edoc_lib:escape_uri/1 to make it works

escape_uri([C | Cs]) when C >= $a, C =< $z ->
    [C | escape_uri(Cs)];
escape_uri([C | Cs]) when C >= $A, C =< $Z ->
    [C | escape_uri(Cs)];
escape_uri([C | Cs]) when C >= $0, C =< $9 ->
    [C | escape_uri(Cs)];
escape_uri([C = $. | Cs]) ->
    [C | escape_uri(Cs)];
escape_uri([C = $- | Cs]) ->
    [C | escape_uri(Cs)];
escape_uri([C = $_ | Cs]) ->
    [C | escape_uri(Cs)];
escape_uri([C | Cs]) when C > 16#7f ->
    %% This assumes that characters are at most 16 bits wide.
    escape_byte(((C band 16#c0) bsr 6) + 16#c0)
    ++ escape_byte(C band 16#3f + 16#80)
    ++ escape_uri(Cs);
escape_uri([C | Cs]) -> escape_byte(C) ++ escape_uri(Cs);
escape_uri([]) -> [].

escape_byte(C) ->
    H = hex_octet(C),
    % io:fwrite("~p - ~p~n", [C, H]),
    normalize(H).

%% Append 0 if length == 1
normalize(H) when length(H) == 1 -> "%0" ++ H;
normalize(H) -> "%" ++ H.

HTML , , ,