vrobot3 is a chat bot for IRC and Discord.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

330 lines
10KB

  1. //-----------------------------------------------------------------------------
  2. //
  3. // Copyright © 2016 Project Golan
  4. //
  5. // See "LICENSE" for more information.
  6. //
  7. //-----------------------------------------------------------------------------
  8. //
  9. // Link expansion.
  10. //
  11. //-----------------------------------------------------------------------------
  12. using System;
  13. using System.Text.RegularExpressions;
  14. using System.Xml;
  15. using System.Linq;
  16. using System.Net;
  17. using System.Collections.Generic;
  18. using System.Threading;
  19. using Sharkbite.Irc;
  20. using HtmlAgilityPack;
  21. using Newtonsoft.Json;
  22. using Newtonsoft.Json.Linq;
  23. namespace ProjectGolan.Vrobot3
  24. {
  25. //
  26. // Mod_Links
  27. //
  28. public class Mod_Links : IBotModule
  29. {
  30. //
  31. // URI
  32. //
  33. private struct URI
  34. {
  35. public String method, host, path, query, tag, uri;
  36. }
  37. private delegate void URIHandler(URI uri, String referer, ref String result);
  38. //
  39. // Mod_Links constructor
  40. //
  41. public Mod_Links(Bot bot_) :
  42. base(bot_)
  43. {
  44. events.OnMessage += Evt_OnMessage;
  45. postSetup();
  46. }
  47. //
  48. // Evt_OnMessage
  49. //
  50. public void Evt_OnMessage(UserInfo usr, String channel, String msg, bool iscmd)
  51. {
  52. // Do this asynchronously, we don't want link parsing to block operation.
  53. new Thread(() => {
  54. try
  55. {
  56. if(!iscmd)
  57. TryParseURIs(channel, msg);
  58. }
  59. catch(Exception exc)
  60. {
  61. Console.WriteLine("{0}: URL thread error: {1}", bot.n_groupname, exc.Message);
  62. }
  63. }).Start();
  64. }
  65. //
  66. // GetURITitle
  67. //
  68. private Match GetURITitle(URI uri, String referer, int kb = 16)
  69. {
  70. String rstr = Utils.GetResponseString(uri.uri, 1024 * kb, referer);
  71. if(rstr == null)
  72. return null;
  73. return new Regex(@"\<title\>(?<realtitle>.+?)\</title\>").Match(rstr);
  74. }
  75. //
  76. // URI_Default
  77. //
  78. private void URI_Default(URI uri, String referer, ref String result)
  79. {
  80. var req = WebRequest.Create(uri.uri) as HttpWebRequest;
  81. req.Referer = referer;
  82. req.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.9) Gecko/20100101 Firefox/31.9";
  83. using(var response = req.GetResponse() as HttpWebResponse)
  84. {
  85. var html = new HtmlDocument();
  86. html.LoadHtml(Utils.GetResponseString(response, 16*1024));
  87. var x_title = from item in html.DocumentNode.Descendants()
  88. where (item?.Name ?? String.Empty) == "title" ||
  89. ((item?.Name ?? String.Empty) == "meta" &&
  90. (item?.Attributes["id"]?.Value ?? String.Empty).EndsWith("title"))
  91. select item;
  92. if(x_title.Any())
  93. result = WebUtility.HtmlDecode(x_title.First().InnerText.Trim(new char[]{ ' ', '\t', '\n' }));
  94. }
  95. }
  96. //
  97. // URI_Youtube
  98. //
  99. // Special fucking snowflake.
  100. //
  101. private void URI_Youtube(URI uri, String referer, ref String result)
  102. {
  103. var req = WebRequest.Create(uri.uri) as HttpWebRequest;
  104. req.Referer = referer;
  105. using(var response = req.GetResponse() as HttpWebResponse)
  106. {
  107. var html = new HtmlDocument();
  108. html.Load(response.GetResponseStream());
  109. var x_title = from item in html.DocumentNode.Descendants()
  110. where (item?.Attributes["id"]?.Value ?? String.Empty) == "eow-title"
  111. select item;
  112. if(x_title.Any())
  113. result = WebUtility.HtmlDecode(x_title.First().InnerText.Trim(new char[]{ ' ', '\t', '\n' })) +
  114. " - YouTube";
  115. }
  116. }
  117. //
  118. // URI_Gelooru
  119. //
  120. private void URI_Gelbooru(URI uri, String referer, ref String result)
  121. {
  122. var match = GetURITitle(uri, referer, 8); // Should be OK to just get the first 8kb here.
  123. if(match?.Success == true)
  124. {
  125. String title = WebUtility.HtmlDecode(match.Groups["realtitle"].Value);
  126. if(title.Contains("Image View"))
  127. result = "Image View - Gelbooru";
  128. else
  129. result = title;
  130. }
  131. }
  132. //
  133. // URI_Hitbox
  134. //
  135. private void URI_Hitbox(URI uri, String referer, ref String result)
  136. {
  137. String name = WebUtility.HtmlEncode(uri.path.TrimStart(new char[]{'/'}));
  138. var req = WebRequest.Create("https://api.hitbox.tv/media/live/" + name + "?fast") as HttpWebRequest;
  139. req.Referer = referer;
  140. using(var response = req.GetResponse() as HttpWebResponse)
  141. {
  142. var json = JObject.Parse(Utils.GetResponseString(response, 64 * 1024));
  143. var node = json["livestream"][0];
  144. String displayname = (String)node["media_display_name"];
  145. String status = (String)node["media_status"];
  146. bool live = Int32.Parse((String)node["media_is_live"] ?? "0") == 1;
  147. result = displayname;
  148. if(live)
  149. result += " (live)";
  150. if(!String.IsNullOrEmpty(status))
  151. result += ": " + status;
  152. result += " - hitbox";
  153. }
  154. }
  155. //
  156. // TryParseURIs
  157. //
  158. // This function is really complicated because of exploits. Fuck exploits.
  159. //
  160. private void TryParseURIs(String channel, String msg)
  161. {
  162. try
  163. {
  164. Regex r_finduris = new Regex(
  165. @"((?<method>[^:/?# ]+):)" +
  166. @"(//(?<host>[^/?# ]*))" +
  167. @"(?<path>[^?# ]*)" +
  168. @"(?<query>\?([^# ]*))?" +
  169. @"(?<tag>#(.*))?"
  170. );
  171. var matchbox = r_finduris.Matches(msg);
  172. if(matchbox.Count != 0)
  173. {
  174. String outp = String.Empty;
  175. for(int i = 0; i < matchbox.Count; i++)
  176. {
  177. var match = matchbox[i];
  178. URI uri = new URI{
  179. method = match.Groups["method"].Value,
  180. host = match.Groups["host"].Value,
  181. path = match.Groups["path"].Value,
  182. query = match.Groups["query"]?.Value ?? String.Empty,
  183. tag = match.Groups["tag"]?.Value ?? String.Empty,
  184. uri = match.Value
  185. };
  186. //
  187. // Will the real URI please stand up?
  188. if(uri.method == "http" || uri.method == "https")
  189. {
  190. var req = WebRequest.Create(uri.uri) as HttpWebRequest;
  191. using(var resp = req.GetResponse())
  192. if(resp.ResponseUri.Host != uri.host)
  193. {
  194. uri.method = resp.ResponseUri.Scheme;
  195. uri.host = resp.ResponseUri.Host;
  196. uri.path = resp.ResponseUri.AbsolutePath;
  197. uri.query = resp.ResponseUri.Query;
  198. uri.tag = resp.ResponseUri.Fragment;
  199. uri.uri = resp.ResponseUri.OriginalString;
  200. }
  201. }
  202. if(uri.path.Length == 0)
  203. uri.path = "/";
  204. //
  205. // Make sure the method is OK.
  206. // Previously:
  207. // [22:19] <marrub> file:///srv/www/marrub/oldmen.html
  208. // [22:19] <vrobot3> [ OLD MEN OLD MEN OLD MEN OLD MEN OLD MEN OLD MEN OLD MEN OLD ... ]
  209. String[] validmethods = { "ftp", "ftps", "http", "https" };
  210. if(!validmethods.Contains(uri.method))
  211. continue;
  212. //
  213. // Try and get a decent title from the URL.
  214. URIHandler handler = URI_Default;
  215. String result = String.Empty;
  216. String referer = null;
  217. if(uri.method == "http" || uri.method == "https")
  218. {
  219. referer = uri.method + "://" + uri.host;
  220. Dictionary<String, URIHandler> handlers = new Dictionary<String, URIHandler>(){
  221. {"youtube.com", URI_Youtube },
  222. {"youtu.be", URI_Youtube },
  223. {"gelbooru.com", URI_Gelbooru},
  224. {"hitbox.tv", URI_Hitbox },
  225. };
  226. String hostst = Regex.Replace(uri.host, @"^www\.", String.Empty, RegexOptions.Multiline);
  227. if(handlers.ContainsKey(hostst))
  228. handler = handlers[hostst];
  229. }
  230. //
  231. // Handle grabbing the title. Just get on with it if we throw an exception.
  232. try
  233. { handler(uri, referer, ref result); }
  234. catch(Exception exc)
  235. {
  236. Console.WriteLine("URL handle exception: {0}", exc.Message);
  237. continue;
  238. }
  239. //
  240. // Sanitize.
  241. result.Trim();
  242. for(int j = result.Length - 1; j >= 0; j--)
  243. {
  244. Char ch = result[j];
  245. if((Char.IsWhiteSpace(ch) && ch != ' ') || Char.IsControl(ch) || Char.IsSurrogate(ch))
  246. result = result.Remove(j, 1);
  247. }
  248. //
  249. // If the result is 0-length, just get rid of it.
  250. if(result.Trim().Length == 0)
  251. continue;
  252. //
  253. // Throw the result into the output buffer.
  254. outp += result;
  255. //
  256. // If the output is too long, we need to shorten it and break.
  257. if(outp.Length > 400 - 3)
  258. {
  259. outp = outp.Substring(0, 400 - 3);
  260. outp += "···";
  261. break;
  262. }
  263. //
  264. // Add separators.
  265. if(i != matchbox.Count - 1)
  266. outp += " | ";
  267. }
  268. if(outp.Length > 0)
  269. bot.Message(channel, "[ " + outp + " ]");
  270. }
  271. }
  272. catch(Exception exc)
  273. {
  274. Console.WriteLine("{0}: URL parse error: {1}", bot.n_groupname, exc.Message ?? "[unknown]");
  275. }
  276. }
  277. }
  278. }