parse.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491
  1. package main
  2. import (
  3. "errors"
  4. "fmt"
  5. "log"
  6. "net/http"
  7. "net/url"
  8. "regexp"
  9. "strconv"
  10. "strings"
  11. "time"
  12. "github.com/ChimeraCoder/anaconda"
  13. "github.com/Davincible/goinsta/v3"
  14. "github.com/PuerkitoBio/goquery"
  15. "github.com/bwmarrin/discordgo"
  16. )
  17. //#region Twitter
  18. func getTwitterUrls(inputURL string) (map[string]string, error) {
  19. parts := strings.Split(inputURL, ":")
  20. if len(parts) < 2 {
  21. return nil, errors.New("unable to parse Twitter URL")
  22. }
  23. return map[string]string{"https:" + parts[1] + ":orig": filenameFromURL(parts[1])}, nil
  24. }
  25. func getTwitterStatusUrls(inputURL string, m *discordgo.Message) (map[string]string, error) {
  26. if twitterClient == nil {
  27. return nil, errors.New("invalid Twitter API credentials")
  28. }
  29. if strings.Contains(inputURL, "/photo/") {
  30. inputURL = inputURL[:strings.Index(inputURL, "/photo/")]
  31. }
  32. if strings.Contains(inputURL, "/video/") {
  33. inputURL = inputURL[:strings.Index(inputURL, "/video/")]
  34. }
  35. matches := regexUrlTwitterStatus.FindStringSubmatch(inputURL)
  36. statusId, err := strconv.ParseInt(matches[4], 10, 64)
  37. if err != nil {
  38. return nil, err
  39. }
  40. tweet, err := twitterClient.GetTweet(statusId, nil)
  41. if err != nil {
  42. return nil, err
  43. }
  44. links := make(map[string]string)
  45. for _, tweetMedia := range tweet.ExtendedEntities.Media {
  46. if len(tweetMedia.VideoInfo.Variants) > 0 {
  47. var lastVideoVariant anaconda.Variant
  48. for _, videoVariant := range tweetMedia.VideoInfo.Variants {
  49. if videoVariant.Bitrate >= lastVideoVariant.Bitrate {
  50. lastVideoVariant = videoVariant
  51. }
  52. }
  53. if lastVideoVariant.Url != "" {
  54. links[lastVideoVariant.Url] = ""
  55. }
  56. } else {
  57. foundUrls := getDownloadLinks(tweetMedia.Media_url_https, m)
  58. for foundUrlKey, foundUrlValue := range foundUrls {
  59. links[foundUrlKey] = foundUrlValue
  60. }
  61. }
  62. }
  63. for _, tweetUrl := range tweet.Entities.Urls {
  64. foundUrls := getDownloadLinks(tweetUrl.Expanded_url, m)
  65. for foundUrlKey, foundUrlValue := range foundUrls {
  66. links[foundUrlKey] = foundUrlValue
  67. }
  68. }
  69. return links, nil
  70. }
  71. //#endregion
  72. //#region Instagram
  73. func getInstagramUrls(inputURL string, m *discordgo.Message) (map[string]string, error) {
  74. if instagramClient == nil {
  75. return nil, errors.New("invalid Instagram API credentials")
  76. }
  77. links := make(map[string]string)
  78. // fix
  79. shortcode := inputURL
  80. if strings.Contains(shortcode, ".com/p/") {
  81. shortcode = shortcode[strings.Index(shortcode, ".com/p/")+7:]
  82. }
  83. if strings.Contains(shortcode, ".com/reel/") {
  84. shortcode = shortcode[strings.Index(shortcode, ".com/reel/")+10:]
  85. }
  86. shortcode = strings.ReplaceAll(shortcode, "/", "")
  87. // fetch
  88. mediaID, err := goinsta.MediaIDFromShortID(shortcode)
  89. if err == nil {
  90. media, err := instagramClient.GetMedia(mediaID)
  91. if err != nil {
  92. return nil, err
  93. } else {
  94. postType := media.Items[0].MediaToString()
  95. if postType == "carousel" {
  96. for index, item := range media.Items[0].CarouselMedia {
  97. itemType := item.MediaToString()
  98. if itemType == "video" {
  99. url := item.Videos[0].URL
  100. links[url] = fmt.Sprintf("%s %d %s", shortcode, index, media.Items[0].User.Username)
  101. } else if itemType == "photo" {
  102. url := item.Images.GetBest()
  103. links[url] = fmt.Sprintf("%s %d %s", shortcode, index, media.Items[0].User.Username)
  104. }
  105. }
  106. } else if postType == "video" {
  107. url := media.Items[0].Videos[0].URL
  108. links[url] = fmt.Sprintf("%s %s", shortcode, media.Items[0].User.Username)
  109. } else if postType == "photo" {
  110. url := media.Items[0].Images.GetBest()
  111. links[url] = fmt.Sprintf("%s %s", shortcode, media.Items[0].User.Username)
  112. }
  113. }
  114. }
  115. return links, nil
  116. }
  117. //#endregion
  118. //#region Imgur
  119. func getImgurSingleUrls(url string) (map[string]string, error) {
  120. url = regexp.MustCompile(`(r\/[^\/]+\/)`).ReplaceAllString(url, "") // remove subreddit url
  121. url = strings.Replace(url, "imgur.com/", "imgur.com/download/", -1)
  122. url = strings.Replace(url, ".gifv", "", -1)
  123. return map[string]string{url: ""}, nil
  124. }
  125. type imgurAlbumObject struct {
  126. Data []struct {
  127. Link string
  128. }
  129. }
  130. func getImgurAlbumUrls(url string) (map[string]string, error) {
  131. url = regexp.MustCompile(`(#[A-Za-z0-9]+)?$`).ReplaceAllString(url, "") // remove anchor
  132. afterLastSlash := strings.LastIndex(url, "/")
  133. albumId := url[afterLastSlash+1:]
  134. headers := make(map[string]string)
  135. headers["Authorization"] = "Client-ID " + imgurClientID
  136. imgurAlbumObject := new(imgurAlbumObject)
  137. getJSONwithHeaders("https://api.imgur.com/3/album/"+albumId+"/images", imgurAlbumObject, headers)
  138. links := make(map[string]string)
  139. for _, v := range imgurAlbumObject.Data {
  140. links[v.Link] = ""
  141. }
  142. if len(links) <= 0 {
  143. return getImgurSingleUrls(url)
  144. }
  145. log.Printf("Found imgur album with %d images (url: %s)\n", len(links), url)
  146. return links, nil
  147. }
  148. //#endregion
  149. //#region Streamable
  150. type streamableObject struct {
  151. Status int `json:"status"`
  152. Title string `json:"title"`
  153. Files struct {
  154. Mp4 struct {
  155. URL string `json:"url"`
  156. Width int `json:"width"`
  157. Height int `json:"height"`
  158. } `json:"mp4"`
  159. Mp4Mobile struct {
  160. URL string `json:"url"`
  161. Width int `json:"width"`
  162. Height int `json:"height"`
  163. } `json:"mp4-mobile"`
  164. } `json:"files"`
  165. URL string `json:"url"`
  166. ThumbnailURL string `json:"thumbnail_url"`
  167. Message interface{} `json:"message"`
  168. }
  169. func getStreamableUrls(url string) (map[string]string, error) {
  170. matches := regexUrlStreamable.FindStringSubmatch(url)
  171. shortcode := matches[3]
  172. if shortcode == "" {
  173. return nil, errors.New("unable to get shortcode from URL")
  174. }
  175. reqUrl := fmt.Sprintf("https://api.streamable.com/videos/%s", shortcode)
  176. streamable := new(streamableObject)
  177. getJSON(reqUrl, streamable)
  178. if streamable.Status != 2 || streamable.Files.Mp4.URL == "" {
  179. return nil, errors.New("streamable object has no download candidate")
  180. }
  181. link := streamable.Files.Mp4.URL
  182. if !strings.HasPrefix(link, "http") {
  183. link = "https:" + link
  184. }
  185. links := make(map[string]string)
  186. links[link] = ""
  187. return links, nil
  188. }
  189. //#endregion
  190. //#region Gfycat
  191. type gfycatObject struct {
  192. GfyItem struct {
  193. Mp4URL string `json:"mp4Url"`
  194. } `json:"gfyItem"`
  195. }
  196. func getGfycatUrls(url string) (map[string]string, error) {
  197. parts := strings.Split(url, "/")
  198. if len(parts) < 3 {
  199. return nil, errors.New("unable to parse Gfycat URL")
  200. }
  201. gfycatId := parts[len(parts)-1]
  202. gfycatObject := new(gfycatObject)
  203. getJSON("https://api.gfycat.com/v1/gfycats/"+gfycatId, gfycatObject)
  204. gfycatUrl := gfycatObject.GfyItem.Mp4URL
  205. if url == "" {
  206. return nil, errors.New("failed to read response from Gfycat")
  207. }
  208. return map[string]string{gfycatUrl: ""}, nil
  209. }
  210. //#endregion
  211. //#region Flickr
  212. type flickrPhotoSizeObject struct {
  213. Label string `json:"label"`
  214. Width int `json:"width"`
  215. Height int `json:"height"`
  216. Source string `json:"source"`
  217. URL string `json:"url"`
  218. Media string `json:"media"`
  219. }
  220. type flickrPhotoObject struct {
  221. Sizes struct {
  222. Canblog int `json:"canblog"`
  223. Canprint int `json:"canprint"`
  224. Candownload int `json:"candownload"`
  225. Size []flickrPhotoSizeObject `json:"size"`
  226. } `json:"sizes"`
  227. Stat string `json:"stat"`
  228. }
  229. func getFlickrUrlFromPhotoId(photoId string) string {
  230. reqUrl := fmt.Sprintf("https://www.flickr.com/services/rest/?format=json&nojsoncallback=1&method=%s&api_key=%s&photo_id=%s",
  231. "flickr.photos.getSizes", config.Credentials.FlickrApiKey, photoId)
  232. flickrPhoto := new(flickrPhotoObject)
  233. getJSON(reqUrl, flickrPhoto)
  234. var bestSize flickrPhotoSizeObject
  235. for _, size := range flickrPhoto.Sizes.Size {
  236. if bestSize.Label == "" {
  237. bestSize = size
  238. } else {
  239. if size.Width > bestSize.Width || size.Height > bestSize.Height {
  240. bestSize = size
  241. }
  242. }
  243. }
  244. return bestSize.Source
  245. }
  246. func getFlickrPhotoUrls(url string) (map[string]string, error) {
  247. if config.Credentials.FlickrApiKey == "" {
  248. return nil, errors.New("invalid Flickr API Key Set")
  249. }
  250. matches := regexUrlFlickrPhoto.FindStringSubmatch(url)
  251. photoId := matches[5]
  252. if photoId == "" {
  253. return nil, errors.New("unable to get Photo ID from URL")
  254. }
  255. return map[string]string{getFlickrUrlFromPhotoId(photoId): ""}, nil
  256. }
  257. type flickrAlbumObject struct {
  258. Photoset struct {
  259. ID string `json:"id"`
  260. Primary string `json:"primary"`
  261. Owner string `json:"owner"`
  262. Ownername string `json:"ownername"`
  263. Photo []struct {
  264. ID string `json:"id"`
  265. Secret string `json:"secret"`
  266. Server string `json:"server"`
  267. Farm int `json:"farm"`
  268. Title string `json:"title"`
  269. Isprimary string `json:"isprimary"`
  270. Ispublic int `json:"ispublic"`
  271. Isfriend int `json:"isfriend"`
  272. Isfamily int `json:"isfamily"`
  273. } `json:"photo"`
  274. Page int `json:"page"`
  275. PerPage int `json:"per_page"`
  276. Perpage int `json:"perpage"`
  277. Pages int `json:"pages"`
  278. Total string `json:"total"`
  279. Title string `json:"title"`
  280. } `json:"photoset"`
  281. Stat string `json:"stat"`
  282. }
  283. func getFlickrAlbumUrls(url string) (map[string]string, error) {
  284. if config.Credentials.FlickrApiKey == "" {
  285. return nil, errors.New("invalid Flickr API Key Set")
  286. }
  287. matches := regexUrlFlickrAlbum.FindStringSubmatch(url)
  288. if len(matches) < 10 || matches[9] == "" {
  289. return nil, errors.New("unable to find Flickr Album ID in URL")
  290. }
  291. albumId := matches[9]
  292. if albumId == "" {
  293. return nil, errors.New("unable to get Album ID from URL")
  294. }
  295. reqUrl := fmt.Sprintf("https://www.flickr.com/services/rest/?format=json&nojsoncallback=1&method=%s&api_key=%s&photoset_id=%s&per_page=500",
  296. "flickr.photosets.getPhotos", config.Credentials.FlickrApiKey, albumId)
  297. flickrAlbum := new(flickrAlbumObject)
  298. getJSON(reqUrl, flickrAlbum)
  299. links := make(map[string]string)
  300. for _, photo := range flickrAlbum.Photoset.Photo {
  301. links[getFlickrUrlFromPhotoId(photo.ID)] = ""
  302. }
  303. return links, nil
  304. }
  305. func getFlickrAlbumShortUrls(url string) (map[string]string, error) {
  306. result, err := http.Get(url)
  307. if err != nil {
  308. return nil, errors.New("Error getting long URL from shortened Flickr Album URL: " + err.Error())
  309. }
  310. if regexUrlFlickrAlbum.MatchString(result.Request.URL.String()) {
  311. return getFlickrAlbumUrls(result.Request.URL.String())
  312. }
  313. return nil, errors.New("encountered invalid URL while trying to get long URL from short Flickr Album URL")
  314. }
  315. //#endregion
  316. //#region Tistory
  317. // getTistoryUrls downloads tistory URLs
  318. // http://t1.daumcdn.net/cfile/tistory/[…] => http://t1.daumcdn.net/cfile/tistory/[…]
  319. // http://t1.daumcdn.net/cfile/tistory/[…]?original => as is
  320. func getTistoryUrls(link string) (map[string]string, error) {
  321. if !strings.HasSuffix(link, "?original") {
  322. link += "?original"
  323. }
  324. return map[string]string{link: ""}, nil
  325. }
  326. func getLegacyTistoryUrls(link string) (map[string]string, error) {
  327. link = strings.Replace(link, "/image/", "/original/", -1)
  328. return map[string]string{link: ""}, nil
  329. }
  330. func getTistoryWithCDNUrls(urlI string) (map[string]string, error) {
  331. parameters, _ := url.ParseQuery(urlI)
  332. if val, ok := parameters["fname"]; ok {
  333. if len(val) > 0 {
  334. if regexUrlTistoryLegacy.MatchString(val[0]) {
  335. return getLegacyTistoryUrls(val[0])
  336. }
  337. }
  338. }
  339. return nil, nil
  340. }
  341. func getPossibleTistorySiteUrls(url string) (map[string]string, error) {
  342. client := new(http.Client)
  343. request, err := http.NewRequest("HEAD", url, nil)
  344. if err != nil {
  345. return nil, err
  346. }
  347. request.Header.Add("Accept-Encoding", "identity")
  348. request.Header.Add("User-Agent", sneakyUserAgent)
  349. respHead, err := client.Do(request)
  350. if err != nil {
  351. return nil, err
  352. }
  353. contentType := ""
  354. for headerKey, headerValue := range respHead.Header {
  355. if headerKey == "Content-Type" {
  356. contentType = headerValue[0]
  357. }
  358. }
  359. if !strings.Contains(contentType, "text/html") {
  360. return nil, nil
  361. }
  362. request, err = http.NewRequest("GET", url, nil)
  363. if err != nil {
  364. return nil, err
  365. }
  366. request.Header.Add("Accept-Encoding", "identity")
  367. request.Header.Add("User-Agent", sneakyUserAgent)
  368. resp, err := client.Do(request)
  369. if err != nil {
  370. return nil, err
  371. }
  372. doc, err := goquery.NewDocumentFromResponse(resp)
  373. if err != nil {
  374. return nil, err
  375. }
  376. var links = make(map[string]string)
  377. doc.Find(".article img, #content img, div[role=main] img, .section_blogview img").Each(func(i int, s *goquery.Selection) {
  378. foundUrl, exists := s.Attr("src")
  379. if exists {
  380. if regexUrlTistoryLegacyWithCDN.MatchString(foundUrl) {
  381. finalTistoryUrls, _ := getTistoryWithCDNUrls(foundUrl)
  382. if len(finalTistoryUrls) > 0 {
  383. for finalTistoryUrl := range finalTistoryUrls {
  384. foundFilename := s.AttrOr("filename", "")
  385. links[finalTistoryUrl] = foundFilename
  386. }
  387. }
  388. } else if regexUrlTistoryLegacy.MatchString(foundUrl) {
  389. finalTistoryUrls, _ := getLegacyTistoryUrls(foundUrl)
  390. if len(finalTistoryUrls) > 0 {
  391. for finalTistoryUrl := range finalTistoryUrls {
  392. foundFilename := s.AttrOr("filename", "")
  393. links[finalTistoryUrl] = foundFilename
  394. }
  395. }
  396. }
  397. }
  398. })
  399. if len(links) > 0 {
  400. log.Printf("[%s] Found tistory album with %d images (url: %s)\n", time.Now().Format(time.Stamp), len(links), url)
  401. }
  402. return links, nil
  403. }
  404. //#endregion
  405. //#region Reddit
  406. // This is very crude but works for now
  407. type redditThreadObject []struct {
  408. Kind string `json:"kind"`
  409. Data struct {
  410. Children interface{} `json:"children"`
  411. } `json:"data"`
  412. }
  413. func getRedditPostUrls(link string) (map[string]string, error) {
  414. if strings.Contains(link, "?") {
  415. link = link[:strings.Index(link, "?")]
  416. }
  417. redditThread := new(redditThreadObject)
  418. headers := make(map[string]string)
  419. headers["Accept-Encoding"] = "identity"
  420. headers["User-Agent"] = sneakyUserAgent
  421. err := getJSONwithHeaders(link+".json", redditThread, headers)
  422. if err != nil {
  423. return nil, fmt.Errorf("failed to parse json from reddit post:\t%s", err)
  424. }
  425. redditPost := (*redditThread)[0].Data.Children.([]interface{})[0].(map[string]interface{})
  426. redditPostData := redditPost["data"].(map[string]interface{})
  427. if redditPostData["url_overridden_by_dest"] != nil {
  428. redditLink := redditPostData["url_overridden_by_dest"].(string)
  429. filename := fmt.Sprintf("Reddit-%s_%s %s", redditPostData["subreddit"].(string), redditPostData["id"].(string), filenameFromURL(redditLink))
  430. return map[string]string{redditLink: filename}, nil
  431. }
  432. return nil, nil
  433. }
  434. //#endregion