parse.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473
  1. package main
  2. import (
  3. "errors"
  4. "fmt"
  5. "log"
  6. "net/http"
  7. "net/url"
  8. "regexp"
  9. "strconv"
  10. "strings"
  11. "time"
  12. "github.com/Davincible/goinsta/v3"
  13. "github.com/PuerkitoBio/goquery"
  14. "github.com/bwmarrin/discordgo"
  15. )
  16. //#region Twitter
  17. func getTwitterUrls(inputURL string) (map[string]string, error) {
  18. parts := strings.Split(inputURL, ":")
  19. if len(parts) < 2 {
  20. return nil, errors.New("unable to parse Twitter URL")
  21. }
  22. return map[string]string{"https:" + parts[1] + ":orig": filenameFromURL(parts[1])}, nil
  23. }
  24. func getTwitterStatusUrls(inputURL string, m *discordgo.Message) (map[string]string, error) {
  25. if strings.Contains(inputURL, "/photo/") {
  26. inputURL = inputURL[:strings.Index(inputURL, "/photo/")]
  27. }
  28. if strings.Contains(inputURL, "/video/") {
  29. inputURL = inputURL[:strings.Index(inputURL, "/video/")]
  30. }
  31. matches := regexUrlTwitterStatus.FindStringSubmatch(inputURL)
  32. _, err := strconv.ParseInt(matches[4], 10, 64)
  33. if err != nil {
  34. return nil, err
  35. }
  36. tweet, err := twitterScraper.GetTweet(matches[4])
  37. if err != nil {
  38. return nil, err
  39. }
  40. links := make(map[string]string)
  41. for _, photo := range tweet.Photos {
  42. foundUrls := getDownloadLinks(photo.URL, m)
  43. for foundUrlKey, foundUrlValue := range foundUrls {
  44. links[foundUrlKey] = foundUrlValue
  45. }
  46. }
  47. for _, video := range tweet.Videos {
  48. foundUrls := getDownloadLinks(video.URL, m)
  49. for foundUrlKey, foundUrlValue := range foundUrls {
  50. links[foundUrlKey] = foundUrlValue
  51. }
  52. }
  53. return links, nil
  54. }
  55. //#endregion
  56. //#region Instagram
  57. func getInstagramUrls(inputURL string, m *discordgo.Message) (map[string]string, error) {
  58. if instagramClient == nil {
  59. return nil, errors.New("invalid Instagram API credentials")
  60. }
  61. links := make(map[string]string)
  62. // fix
  63. shortcode := inputURL
  64. if strings.Contains(shortcode, ".com/p/") {
  65. shortcode = shortcode[strings.Index(shortcode, ".com/p/")+7:]
  66. }
  67. if strings.Contains(shortcode, ".com/reel/") {
  68. shortcode = shortcode[strings.Index(shortcode, ".com/reel/")+10:]
  69. }
  70. shortcode = strings.ReplaceAll(shortcode, "/", "")
  71. // fetch
  72. mediaID, err := goinsta.MediaIDFromShortID(shortcode)
  73. if err == nil {
  74. media, err := instagramClient.GetMedia(mediaID)
  75. if err != nil {
  76. return nil, err
  77. } else {
  78. postType := media.Items[0].MediaToString()
  79. if postType == "carousel" {
  80. for index, item := range media.Items[0].CarouselMedia {
  81. itemType := item.MediaToString()
  82. if itemType == "video" {
  83. url := item.Videos[0].URL
  84. links[url] = fmt.Sprintf("%s %d %s", shortcode, index, media.Items[0].User.Username)
  85. } else if itemType == "photo" {
  86. url := item.Images.GetBest()
  87. links[url] = fmt.Sprintf("%s %d %s", shortcode, index, media.Items[0].User.Username)
  88. }
  89. }
  90. } else if postType == "video" {
  91. url := media.Items[0].Videos[0].URL
  92. links[url] = fmt.Sprintf("%s %s", shortcode, media.Items[0].User.Username)
  93. } else if postType == "photo" {
  94. url := media.Items[0].Images.GetBest()
  95. links[url] = fmt.Sprintf("%s %s", shortcode, media.Items[0].User.Username)
  96. }
  97. }
  98. }
  99. return links, nil
  100. }
  101. //#endregion
  102. //#region Imgur
  103. func getImgurSingleUrls(url string) (map[string]string, error) {
  104. url = regexp.MustCompile(`(r\/[^\/]+\/)`).ReplaceAllString(url, "") // remove subreddit url
  105. url = strings.Replace(url, "imgur.com/", "imgur.com/download/", -1)
  106. url = strings.Replace(url, ".gifv", "", -1)
  107. return map[string]string{url: ""}, nil
  108. }
  109. type imgurAlbumObject struct {
  110. Data []struct {
  111. Link string
  112. }
  113. }
  114. func getImgurAlbumUrls(url string) (map[string]string, error) {
  115. url = regexp.MustCompile(`(#[A-Za-z0-9]+)?$`).ReplaceAllString(url, "") // remove anchor
  116. afterLastSlash := strings.LastIndex(url, "/")
  117. albumId := url[afterLastSlash+1:]
  118. headers := make(map[string]string)
  119. headers["Authorization"] = "Client-ID " + imgurClientID
  120. imgurAlbumObject := new(imgurAlbumObject)
  121. getJSONwithHeaders("https://api.imgur.com/3/album/"+albumId+"/images", imgurAlbumObject, headers)
  122. links := make(map[string]string)
  123. for _, v := range imgurAlbumObject.Data {
  124. links[v.Link] = ""
  125. }
  126. if len(links) <= 0 {
  127. return getImgurSingleUrls(url)
  128. }
  129. log.Printf("Found imgur album with %d images (url: %s)\n", len(links), url)
  130. return links, nil
  131. }
  132. //#endregion
  133. //#region Streamable
  134. type streamableObject struct {
  135. Status int `json:"status"`
  136. Title string `json:"title"`
  137. Files struct {
  138. Mp4 struct {
  139. URL string `json:"url"`
  140. Width int `json:"width"`
  141. Height int `json:"height"`
  142. } `json:"mp4"`
  143. Mp4Mobile struct {
  144. URL string `json:"url"`
  145. Width int `json:"width"`
  146. Height int `json:"height"`
  147. } `json:"mp4-mobile"`
  148. } `json:"files"`
  149. URL string `json:"url"`
  150. ThumbnailURL string `json:"thumbnail_url"`
  151. Message interface{} `json:"message"`
  152. }
  153. func getStreamableUrls(url string) (map[string]string, error) {
  154. matches := regexUrlStreamable.FindStringSubmatch(url)
  155. shortcode := matches[3]
  156. if shortcode == "" {
  157. return nil, errors.New("unable to get shortcode from URL")
  158. }
  159. reqUrl := fmt.Sprintf("https://api.streamable.com/videos/%s", shortcode)
  160. streamable := new(streamableObject)
  161. getJSON(reqUrl, streamable)
  162. if streamable.Status != 2 || streamable.Files.Mp4.URL == "" {
  163. return nil, errors.New("streamable object has no download candidate")
  164. }
  165. link := streamable.Files.Mp4.URL
  166. if !strings.HasPrefix(link, "http") {
  167. link = "https:" + link
  168. }
  169. links := make(map[string]string)
  170. links[link] = ""
  171. return links, nil
  172. }
  173. //#endregion
  174. //#region Gfycat
  175. type gfycatObject struct {
  176. GfyItem struct {
  177. Mp4URL string `json:"mp4Url"`
  178. } `json:"gfyItem"`
  179. }
  180. func getGfycatUrls(url string) (map[string]string, error) {
  181. parts := strings.Split(url, "/")
  182. if len(parts) < 3 {
  183. return nil, errors.New("unable to parse Gfycat URL")
  184. }
  185. gfycatId := parts[len(parts)-1]
  186. gfycatObject := new(gfycatObject)
  187. getJSON("https://api.gfycat.com/v1/gfycats/"+gfycatId, gfycatObject)
  188. gfycatUrl := gfycatObject.GfyItem.Mp4URL
  189. if url == "" {
  190. return nil, errors.New("failed to read response from Gfycat")
  191. }
  192. return map[string]string{gfycatUrl: ""}, nil
  193. }
  194. //#endregion
  195. //#region Flickr
  196. type flickrPhotoSizeObject struct {
  197. Label string `json:"label"`
  198. Width int `json:"width"`
  199. Height int `json:"height"`
  200. Source string `json:"source"`
  201. URL string `json:"url"`
  202. Media string `json:"media"`
  203. }
  204. type flickrPhotoObject struct {
  205. Sizes struct {
  206. Canblog int `json:"canblog"`
  207. Canprint int `json:"canprint"`
  208. Candownload int `json:"candownload"`
  209. Size []flickrPhotoSizeObject `json:"size"`
  210. } `json:"sizes"`
  211. Stat string `json:"stat"`
  212. }
  213. func getFlickrUrlFromPhotoId(photoId string) string {
  214. reqUrl := fmt.Sprintf("https://www.flickr.com/services/rest/?format=json&nojsoncallback=1&method=%s&api_key=%s&photo_id=%s",
  215. "flickr.photos.getSizes", config.Credentials.FlickrApiKey, photoId)
  216. flickrPhoto := new(flickrPhotoObject)
  217. getJSON(reqUrl, flickrPhoto)
  218. var bestSize flickrPhotoSizeObject
  219. for _, size := range flickrPhoto.Sizes.Size {
  220. if bestSize.Label == "" {
  221. bestSize = size
  222. } else {
  223. if size.Width > bestSize.Width || size.Height > bestSize.Height {
  224. bestSize = size
  225. }
  226. }
  227. }
  228. return bestSize.Source
  229. }
  230. func getFlickrPhotoUrls(url string) (map[string]string, error) {
  231. if config.Credentials.FlickrApiKey == "" {
  232. return nil, errors.New("invalid Flickr API Key Set")
  233. }
  234. matches := regexUrlFlickrPhoto.FindStringSubmatch(url)
  235. photoId := matches[5]
  236. if photoId == "" {
  237. return nil, errors.New("unable to get Photo ID from URL")
  238. }
  239. return map[string]string{getFlickrUrlFromPhotoId(photoId): ""}, nil
  240. }
  241. type flickrAlbumObject struct {
  242. Photoset struct {
  243. ID string `json:"id"`
  244. Primary string `json:"primary"`
  245. Owner string `json:"owner"`
  246. Ownername string `json:"ownername"`
  247. Photo []struct {
  248. ID string `json:"id"`
  249. Secret string `json:"secret"`
  250. Server string `json:"server"`
  251. Farm int `json:"farm"`
  252. Title string `json:"title"`
  253. Isprimary string `json:"isprimary"`
  254. Ispublic int `json:"ispublic"`
  255. Isfriend int `json:"isfriend"`
  256. Isfamily int `json:"isfamily"`
  257. } `json:"photo"`
  258. Page int `json:"page"`
  259. PerPage int `json:"per_page"`
  260. Perpage int `json:"perpage"`
  261. Pages int `json:"pages"`
  262. Total string `json:"total"`
  263. Title string `json:"title"`
  264. } `json:"photoset"`
  265. Stat string `json:"stat"`
  266. }
  267. func getFlickrAlbumUrls(url string) (map[string]string, error) {
  268. if config.Credentials.FlickrApiKey == "" {
  269. return nil, errors.New("invalid Flickr API Key Set")
  270. }
  271. matches := regexUrlFlickrAlbum.FindStringSubmatch(url)
  272. if len(matches) < 10 || matches[9] == "" {
  273. return nil, errors.New("unable to find Flickr Album ID in URL")
  274. }
  275. albumId := matches[9]
  276. if albumId == "" {
  277. return nil, errors.New("unable to get Album ID from URL")
  278. }
  279. reqUrl := fmt.Sprintf("https://www.flickr.com/services/rest/?format=json&nojsoncallback=1&method=%s&api_key=%s&photoset_id=%s&per_page=500",
  280. "flickr.photosets.getPhotos", config.Credentials.FlickrApiKey, albumId)
  281. flickrAlbum := new(flickrAlbumObject)
  282. getJSON(reqUrl, flickrAlbum)
  283. links := make(map[string]string)
  284. for _, photo := range flickrAlbum.Photoset.Photo {
  285. links[getFlickrUrlFromPhotoId(photo.ID)] = ""
  286. }
  287. return links, nil
  288. }
  289. func getFlickrAlbumShortUrls(url string) (map[string]string, error) {
  290. result, err := http.Get(url)
  291. if err != nil {
  292. return nil, errors.New("Error getting long URL from shortened Flickr Album URL: " + err.Error())
  293. }
  294. if regexUrlFlickrAlbum.MatchString(result.Request.URL.String()) {
  295. return getFlickrAlbumUrls(result.Request.URL.String())
  296. }
  297. return nil, errors.New("encountered invalid URL while trying to get long URL from short Flickr Album URL")
  298. }
  299. //#endregion
  300. //#region Tistory
  301. // getTistoryUrls downloads tistory URLs
  302. // http://t1.daumcdn.net/cfile/tistory/[…] => http://t1.daumcdn.net/cfile/tistory/[…]
  303. // http://t1.daumcdn.net/cfile/tistory/[…]?original => as is
  304. func getTistoryUrls(link string) (map[string]string, error) {
  305. if !strings.HasSuffix(link, "?original") {
  306. link += "?original"
  307. }
  308. return map[string]string{link: ""}, nil
  309. }
  310. func getLegacyTistoryUrls(link string) (map[string]string, error) {
  311. link = strings.Replace(link, "/image/", "/original/", -1)
  312. return map[string]string{link: ""}, nil
  313. }
  314. func getTistoryWithCDNUrls(urlI string) (map[string]string, error) {
  315. parameters, _ := url.ParseQuery(urlI)
  316. if val, ok := parameters["fname"]; ok {
  317. if len(val) > 0 {
  318. if regexUrlTistoryLegacy.MatchString(val[0]) {
  319. return getLegacyTistoryUrls(val[0])
  320. }
  321. }
  322. }
  323. return nil, nil
  324. }
  325. func getPossibleTistorySiteUrls(url string) (map[string]string, error) {
  326. client := new(http.Client)
  327. request, err := http.NewRequest("HEAD", url, nil)
  328. if err != nil {
  329. return nil, err
  330. }
  331. request.Header.Add("Accept-Encoding", "identity")
  332. request.Header.Add("User-Agent", sneakyUserAgent)
  333. respHead, err := client.Do(request)
  334. if err != nil {
  335. return nil, err
  336. }
  337. contentType := ""
  338. for headerKey, headerValue := range respHead.Header {
  339. if headerKey == "Content-Type" {
  340. contentType = headerValue[0]
  341. }
  342. }
  343. if !strings.Contains(contentType, "text/html") {
  344. return nil, nil
  345. }
  346. request, err = http.NewRequest("GET", url, nil)
  347. if err != nil {
  348. return nil, err
  349. }
  350. request.Header.Add("Accept-Encoding", "identity")
  351. request.Header.Add("User-Agent", sneakyUserAgent)
  352. resp, err := client.Do(request)
  353. if err != nil {
  354. return nil, err
  355. }
  356. doc, err := goquery.NewDocumentFromResponse(resp)
  357. if err != nil {
  358. return nil, err
  359. }
  360. var links = make(map[string]string)
  361. doc.Find(".article img, #content img, div[role=main] img, .section_blogview img").Each(func(i int, s *goquery.Selection) {
  362. foundUrl, exists := s.Attr("src")
  363. if exists {
  364. if regexUrlTistoryLegacyWithCDN.MatchString(foundUrl) {
  365. finalTistoryUrls, _ := getTistoryWithCDNUrls(foundUrl)
  366. if len(finalTistoryUrls) > 0 {
  367. for finalTistoryUrl := range finalTistoryUrls {
  368. foundFilename := s.AttrOr("filename", "")
  369. links[finalTistoryUrl] = foundFilename
  370. }
  371. }
  372. } else if regexUrlTistoryLegacy.MatchString(foundUrl) {
  373. finalTistoryUrls, _ := getLegacyTistoryUrls(foundUrl)
  374. if len(finalTistoryUrls) > 0 {
  375. for finalTistoryUrl := range finalTistoryUrls {
  376. foundFilename := s.AttrOr("filename", "")
  377. links[finalTistoryUrl] = foundFilename
  378. }
  379. }
  380. }
  381. }
  382. })
  383. if len(links) > 0 {
  384. log.Printf("[%s] Found tistory album with %d images (url: %s)\n", time.Now().Format(time.Stamp), len(links), url)
  385. }
  386. return links, nil
  387. }
  388. //#endregion
  389. //#region Reddit
  390. // This is very crude but works for now
  391. type redditThreadObject []struct {
  392. Kind string `json:"kind"`
  393. Data struct {
  394. Children interface{} `json:"children"`
  395. } `json:"data"`
  396. }
  397. func getRedditPostUrls(link string) (map[string]string, error) {
  398. if strings.Contains(link, "?") {
  399. link = link[:strings.Index(link, "?")]
  400. }
  401. redditThread := new(redditThreadObject)
  402. headers := make(map[string]string)
  403. headers["Accept-Encoding"] = "identity"
  404. headers["User-Agent"] = sneakyUserAgent
  405. err := getJSONwithHeaders(link+".json", redditThread, headers)
  406. if err != nil {
  407. return nil, fmt.Errorf("failed to parse json from reddit post:\t%s", err)
  408. }
  409. redditPost := (*redditThread)[0].Data.Children.([]interface{})[0].(map[string]interface{})
  410. redditPostData := redditPost["data"].(map[string]interface{})
  411. if redditPostData["url_overridden_by_dest"] != nil {
  412. redditLink := redditPostData["url_overridden_by_dest"].(string)
  413. filename := fmt.Sprintf("Reddit-%s_%s %s", redditPostData["subreddit"].(string), redditPostData["id"].(string), filenameFromURL(redditLink))
  414. return map[string]string{redditLink: filename}, nil
  415. }
  416. return nil, nil
  417. }
  418. //#endregion