123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623 |
- package main
- import (
- "errors"
- "fmt"
- "log"
- "net/http"
- "net/url"
- "regexp"
- "strconv"
- "strings"
- "time"
- "github.com/ChimeraCoder/anaconda"
- "github.com/Jeffail/gabs"
- "github.com/PuerkitoBio/goquery"
- "github.com/bwmarrin/discordgo"
- "github.com/fatih/color"
- "golang.org/x/net/html"
- )
- const (
- imgurClientID = "08af502a9e70d65"
- sneakyUserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36"
- )
- var (
- twitterClient *anaconda.TwitterApi
- )
- //#region Twitter
- func getTwitterUrls(inputURL string) (map[string]string, error) {
- parts := strings.Split(inputURL, ":")
- if len(parts) < 2 {
- return nil, errors.New("unable to parse Twitter URL")
- }
- return map[string]string{"https:" + parts[1] + ":orig": filenameFromURL(parts[1])}, nil
- }
- func getTwitterStatusUrls(inputURL string, m *discordgo.Message) (map[string]string, error) {
- if twitterClient == nil {
- return nil, errors.New("invalid Twitter API Keys Set")
- }
- if strings.Contains(inputURL, "/photo/") {
- inputURL = inputURL[:strings.Index(inputURL, "/photo/")]
- }
- if strings.Contains(inputURL, "/video/") {
- inputURL = inputURL[:strings.Index(inputURL, "/video/")]
- }
- matches := regexUrlTwitterStatus.FindStringSubmatch(inputURL)
- statusId, err := strconv.ParseInt(matches[4], 10, 64)
- if err != nil {
- return nil, err
- }
- tweet, err := twitterClient.GetTweet(statusId, nil)
- if err != nil {
- return nil, err
- }
- links := make(map[string]string)
- for _, tweetMedia := range tweet.ExtendedEntities.Media {
- if len(tweetMedia.VideoInfo.Variants) > 0 {
- var lastVideoVariant anaconda.Variant
- for _, videoVariant := range tweetMedia.VideoInfo.Variants {
- if videoVariant.Bitrate >= lastVideoVariant.Bitrate {
- lastVideoVariant = videoVariant
- }
- }
- if lastVideoVariant.Url != "" {
- links[lastVideoVariant.Url] = ""
- }
- } else {
- foundUrls := getDownloadLinks(tweetMedia.Media_url_https, m)
- for foundUrlKey, foundUrlValue := range foundUrls {
- links[foundUrlKey] = foundUrlValue
- }
- }
- }
- for _, tweetUrl := range tweet.Entities.Urls {
- foundUrls := getDownloadLinks(tweetUrl.Expanded_url, m)
- for foundUrlKey, foundUrlValue := range foundUrls {
- links[foundUrlKey] = foundUrlValue
- }
- }
- return links, nil
- }
- //#endregion
- //#region Instagram
- func getInstagramUrls(url string) (map[string]string, error) {
- username, shortcode := getInstagramInfo(url)
- filename := fmt.Sprintf("instagram %s - %s", username, shortcode)
- // if instagram video
- videoUrl := getInstagramVideoUrl(url)
- if videoUrl != "" {
- return map[string]string{videoUrl: filename + filepathExtension(videoUrl)}, nil
- }
- // if instagram album
- albumUrls := getInstagramAlbumUrls(url)
- if len(albumUrls) > 0 {
- links := make(map[string]string)
- for i, albumUrl := range albumUrls {
- links[albumUrl] = filename + " " + strconv.Itoa(i+1) + filepathExtension(albumUrl)
- }
- return links, nil
- }
- // if instagram picture
- afterLastSlash := strings.LastIndex(url, "/")
- mediaUrl := url[:afterLastSlash]
- mediaUrl += strings.Replace(strings.Replace(url[afterLastSlash:], "?", "&", -1), "/", "/media/?size=l", -1)
- return map[string]string{mediaUrl: filename + ".jpg"}, nil
- }
- func getInstagramInfo(url string) (string, string) {
- resp, err := http.Get(url)
- if err != nil {
- return "unknown", "unknown"
- }
- defer resp.Body.Close()
- z := html.NewTokenizer(resp.Body)
- ParseLoop:
- for {
- tt := z.Next()
- switch {
- case tt == html.ErrorToken:
- break ParseLoop
- }
- if tt == html.StartTagToken || tt == html.SelfClosingTagToken {
- t := z.Token()
- for _, a := range t.Attr {
- if a.Key == "type" {
- if a.Val == "text/javascript" {
- z.Next()
- content := string(z.Text())
- if strings.Contains(content, "window._sharedData = ") {
- content = strings.Replace(content, "window._sharedData = ", "", 1)
- content = content[:len(content)-1]
- jsonParsed, err := gabs.ParseJSON([]byte(content))
- if err != nil {
- log.Println(lg("API", "Instagram", color.HiRedString, "error parsing instagram json:\t"+err.Error()))
- continue ParseLoop
- }
- entryChildren, err := jsonParsed.Path("entry_data.PostPage").Children()
- if err != nil {
- log.Println(lg("API", "Instagram", color.HiRedString, "unable to find entries children:\t"+err.Error()))
- continue ParseLoop
- }
- for _, entryChild := range entryChildren {
- shortcode := entryChild.Path("graphql.shortcode_media.shortcode").Data().(string)
- username := entryChild.Path("graphql.shortcode_media.owner.username").Data().(string)
- return username, shortcode
- }
- }
- }
- }
- }
- }
- }
- return "unknown", "unknown"
- }
- func getInstagramVideoUrl(url string) string {
- resp, err := http.Get(url)
- if err != nil {
- return ""
- }
- defer resp.Body.Close()
- z := html.NewTokenizer(resp.Body)
- for {
- tt := z.Next()
- switch {
- case tt == html.ErrorToken:
- return ""
- }
- if tt == html.StartTagToken || tt == html.SelfClosingTagToken {
- t := z.Token()
- if t.Data == "meta" {
- for _, a := range t.Attr {
- if a.Key == "property" {
- if a.Val == "og:video" || a.Val == "og:video:secure_url" {
- for _, at := range t.Attr {
- if at.Key == "content" {
- return at.Val
- }
- }
- }
- }
- }
- }
- }
- }
- }
- func getInstagramAlbumUrls(url string) []string {
- var links []string
- resp, err := http.Get(url)
- if err != nil {
- return links
- }
- defer resp.Body.Close()
- z := html.NewTokenizer(resp.Body)
- ParseLoop:
- for {
- tt := z.Next()
- switch {
- case tt == html.ErrorToken:
- break ParseLoop
- }
- if tt == html.StartTagToken || tt == html.SelfClosingTagToken {
- t := z.Token()
- for _, a := range t.Attr {
- if a.Key == "type" {
- if a.Val == "text/javascript" {
- z.Next()
- content := string(z.Text())
- if strings.Contains(content, "window._sharedData = ") {
- content = strings.Replace(content, "window._sharedData = ", "", 1)
- content = content[:len(content)-1]
- jsonParsed, err := gabs.ParseJSON([]byte(content))
- if err != nil {
- log.Println(lg("API", "Instagram", color.HiRedString, "error parsing instagram json:\t%s", err))
- continue ParseLoop
- }
- entryChildren, err := jsonParsed.Path("entry_data.PostPage").Children()
- if err != nil {
- log.Println("Unable to find entries children: ", err)
- continue ParseLoop
- }
- for _, entryChild := range entryChildren {
- albumChildren, err := entryChild.Path("graphql.shortcode_media.edge_sidecar_to_children.edges").Children()
- if err != nil {
- continue ParseLoop
- }
- for _, albumChild := range albumChildren {
- link, ok := albumChild.Path("node.display_url").Data().(string)
- if ok {
- links = append(links, link)
- }
- }
- }
- }
- }
- }
- }
- }
- }
- if len(links) > 0 {
- log.Printf("Found instagram album with %d images (url: %s)\n", len(links), url)
- }
- return links
- }
- //#endregion
- //#region Imgur
- func getImgurSingleUrls(url string) (map[string]string, error) {
- url = regexp.MustCompile(`(r\/[^\/]+\/)`).ReplaceAllString(url, "") // remove subreddit url
- url = strings.Replace(url, "imgur.com/", "imgur.com/download/", -1)
- url = strings.Replace(url, ".gifv", "", -1)
- return map[string]string{url: ""}, nil
- }
- type imgurAlbumObject struct {
- Data []struct {
- Link string
- }
- }
- func getImgurAlbumUrls(url string) (map[string]string, error) {
- url = regexp.MustCompile(`(#[A-Za-z0-9]+)?$`).ReplaceAllString(url, "") // remove anchor
- afterLastSlash := strings.LastIndex(url, "/")
- albumId := url[afterLastSlash+1:]
- headers := make(map[string]string)
- headers["Authorization"] = "Client-ID " + imgurClientID
- imgurAlbumObject := new(imgurAlbumObject)
- getJSONwithHeaders("https://api.imgur.com/3/album/"+albumId+"/images", imgurAlbumObject, headers)
- links := make(map[string]string)
- for _, v := range imgurAlbumObject.Data {
- links[v.Link] = ""
- }
- if len(links) <= 0 {
- return getImgurSingleUrls(url)
- }
- log.Printf("Found imgur album with %d images (url: %s)\n", len(links), url)
- return links, nil
- }
- //#endregion
- //#region Streamable
- type streamableObject struct {
- Status int `json:"status"`
- Title string `json:"title"`
- Files struct {
- Mp4 struct {
- URL string `json:"url"`
- Width int `json:"width"`
- Height int `json:"height"`
- } `json:"mp4"`
- Mp4Mobile struct {
- URL string `json:"url"`
- Width int `json:"width"`
- Height int `json:"height"`
- } `json:"mp4-mobile"`
- } `json:"files"`
- URL string `json:"url"`
- ThumbnailURL string `json:"thumbnail_url"`
- Message interface{} `json:"message"`
- }
- func getStreamableUrls(url string) (map[string]string, error) {
- matches := regexUrlStreamable.FindStringSubmatch(url)
- shortcode := matches[3]
- if shortcode == "" {
- return nil, errors.New("unable to get shortcode from URL")
- }
- reqUrl := fmt.Sprintf("https://api.streamable.com/videos/%s", shortcode)
- streamable := new(streamableObject)
- getJSON(reqUrl, streamable)
- if streamable.Status != 2 || streamable.Files.Mp4.URL == "" {
- return nil, errors.New("streamable object has no download candidate")
- }
- link := streamable.Files.Mp4.URL
- if !strings.HasPrefix(link, "http") {
- link = "https:" + link
- }
- links := make(map[string]string)
- links[link] = ""
- return links, nil
- }
- //#endregion
- //#region Gfycat
- type gfycatObject struct {
- GfyItem struct {
- Mp4URL string `json:"mp4Url"`
- } `json:"gfyItem"`
- }
- func getGfycatUrls(url string) (map[string]string, error) {
- parts := strings.Split(url, "/")
- if len(parts) < 3 {
- return nil, errors.New("unable to parse Gfycat URL")
- }
- gfycatId := parts[len(parts)-1]
- gfycatObject := new(gfycatObject)
- getJSON("https://api.gfycat.com/v1/gfycats/"+gfycatId, gfycatObject)
- gfycatUrl := gfycatObject.GfyItem.Mp4URL
- if url == "" {
- return nil, errors.New("failed to read response from Gfycat")
- }
- return map[string]string{gfycatUrl: ""}, nil
- }
- //#endregion
- //#region Flickr
- type flickrPhotoSizeObject struct {
- Label string `json:"label"`
- Width int `json:"width"`
- Height int `json:"height"`
- Source string `json:"source"`
- URL string `json:"url"`
- Media string `json:"media"`
- }
- type flickrPhotoObject struct {
- Sizes struct {
- Canblog int `json:"canblog"`
- Canprint int `json:"canprint"`
- Candownload int `json:"candownload"`
- Size []flickrPhotoSizeObject `json:"size"`
- } `json:"sizes"`
- Stat string `json:"stat"`
- }
- func getFlickrUrlFromPhotoId(photoId string) string {
- reqUrl := fmt.Sprintf("https://www.flickr.com/services/rest/?format=json&nojsoncallback=1&method=%s&api_key=%s&photo_id=%s",
- "flickr.photos.getSizes", config.Credentials.FlickrApiKey, photoId)
- flickrPhoto := new(flickrPhotoObject)
- getJSON(reqUrl, flickrPhoto)
- var bestSize flickrPhotoSizeObject
- for _, size := range flickrPhoto.Sizes.Size {
- if bestSize.Label == "" {
- bestSize = size
- } else {
- if size.Width > bestSize.Width || size.Height > bestSize.Height {
- bestSize = size
- }
- }
- }
- return bestSize.Source
- }
- func getFlickrPhotoUrls(url string) (map[string]string, error) {
- if config.Credentials.FlickrApiKey == "" {
- return nil, errors.New("invalid Flickr API Key Set")
- }
- matches := regexUrlFlickrPhoto.FindStringSubmatch(url)
- photoId := matches[5]
- if photoId == "" {
- return nil, errors.New("unable to get Photo ID from URL")
- }
- return map[string]string{getFlickrUrlFromPhotoId(photoId): ""}, nil
- }
- type flickrAlbumObject struct {
- Photoset struct {
- ID string `json:"id"`
- Primary string `json:"primary"`
- Owner string `json:"owner"`
- Ownername string `json:"ownername"`
- Photo []struct {
- ID string `json:"id"`
- Secret string `json:"secret"`
- Server string `json:"server"`
- Farm int `json:"farm"`
- Title string `json:"title"`
- Isprimary string `json:"isprimary"`
- Ispublic int `json:"ispublic"`
- Isfriend int `json:"isfriend"`
- Isfamily int `json:"isfamily"`
- } `json:"photo"`
- Page int `json:"page"`
- PerPage int `json:"per_page"`
- Perpage int `json:"perpage"`
- Pages int `json:"pages"`
- Total string `json:"total"`
- Title string `json:"title"`
- } `json:"photoset"`
- Stat string `json:"stat"`
- }
- func getFlickrAlbumUrls(url string) (map[string]string, error) {
- if config.Credentials.FlickrApiKey == "" {
- return nil, errors.New("invalid Flickr API Key Set")
- }
- matches := regexUrlFlickrAlbum.FindStringSubmatch(url)
- if len(matches) < 10 || matches[9] == "" {
- return nil, errors.New("unable to find Flickr Album ID in URL")
- }
- albumId := matches[9]
- if albumId == "" {
- return nil, errors.New("unable to get Album ID from URL")
- }
- reqUrl := fmt.Sprintf("https://www.flickr.com/services/rest/?format=json&nojsoncallback=1&method=%s&api_key=%s&photoset_id=%s&per_page=500",
- "flickr.photosets.getPhotos", config.Credentials.FlickrApiKey, albumId)
- flickrAlbum := new(flickrAlbumObject)
- getJSON(reqUrl, flickrAlbum)
- links := make(map[string]string)
- for _, photo := range flickrAlbum.Photoset.Photo {
- links[getFlickrUrlFromPhotoId(photo.ID)] = ""
- }
- return links, nil
- }
- func getFlickrAlbumShortUrls(url string) (map[string]string, error) {
- result, err := http.Get(url)
- if err != nil {
- return nil, errors.New("Error getting long URL from shortened Flickr Album URL: " + err.Error())
- }
- if regexUrlFlickrAlbum.MatchString(result.Request.URL.String()) {
- return getFlickrAlbumUrls(result.Request.URL.String())
- }
- return nil, errors.New("encountered invalid URL while trying to get long URL from short Flickr Album URL")
- }
- //#endregion
- //#region Tistory
- // getTistoryUrls downloads tistory URLs
- // http://t1.daumcdn.net/cfile/tistory/[…] => http://t1.daumcdn.net/cfile/tistory/[…]
- // http://t1.daumcdn.net/cfile/tistory/[…]?original => as is
- func getTistoryUrls(link string) (map[string]string, error) {
- if !strings.HasSuffix(link, "?original") {
- link += "?original"
- }
- return map[string]string{link: ""}, nil
- }
- func getLegacyTistoryUrls(link string) (map[string]string, error) {
- link = strings.Replace(link, "/image/", "/original/", -1)
- return map[string]string{link: ""}, nil
- }
- func getTistoryWithCDNUrls(urlI string) (map[string]string, error) {
- parameters, _ := url.ParseQuery(urlI)
- if val, ok := parameters["fname"]; ok {
- if len(val) > 0 {
- if regexUrlTistoryLegacy.MatchString(val[0]) {
- return getLegacyTistoryUrls(val[0])
- }
- }
- }
- return nil, nil
- }
- func getPossibleTistorySiteUrls(url string) (map[string]string, error) {
- client := new(http.Client)
- request, err := http.NewRequest("HEAD", url, nil)
- if err != nil {
- return nil, err
- }
- request.Header.Add("Accept-Encoding", "identity")
- request.Header.Add("User-Agent", sneakyUserAgent)
- respHead, err := client.Do(request)
- if err != nil {
- return nil, err
- }
- contentType := ""
- for headerKey, headerValue := range respHead.Header {
- if headerKey == "Content-Type" {
- contentType = headerValue[0]
- }
- }
- if !strings.Contains(contentType, "text/html") {
- return nil, nil
- }
- request, err = http.NewRequest("GET", url, nil)
- if err != nil {
- return nil, err
- }
- request.Header.Add("Accept-Encoding", "identity")
- request.Header.Add("User-Agent", sneakyUserAgent)
- resp, err := client.Do(request)
- if err != nil {
- return nil, err
- }
- doc, err := goquery.NewDocumentFromResponse(resp)
- if err != nil {
- return nil, err
- }
- var links = make(map[string]string)
- doc.Find(".article img, #content img, div[role=main] img, .section_blogview img").Each(func(i int, s *goquery.Selection) {
- foundUrl, exists := s.Attr("src")
- if exists {
- if regexUrlTistoryLegacyWithCDN.MatchString(foundUrl) {
- finalTistoryUrls, _ := getTistoryWithCDNUrls(foundUrl)
- if len(finalTistoryUrls) > 0 {
- for finalTistoryUrl := range finalTistoryUrls {
- foundFilename := s.AttrOr("filename", "")
- links[finalTistoryUrl] = foundFilename
- }
- }
- } else if regexUrlTistoryLegacy.MatchString(foundUrl) {
- finalTistoryUrls, _ := getLegacyTistoryUrls(foundUrl)
- if len(finalTistoryUrls) > 0 {
- for finalTistoryUrl := range finalTistoryUrls {
- foundFilename := s.AttrOr("filename", "")
- links[finalTistoryUrl] = foundFilename
- }
- }
- }
- }
- })
- if len(links) > 0 {
- log.Printf("[%s] Found tistory album with %d images (url: %s)\n", time.Now().Format(time.Stamp), len(links), url)
- }
- return links, nil
- }
- //#endregion
- //#region Reddit
- // This is very crude but works for now
- type redditThreadObject []struct {
- Kind string `json:"kind"`
- Data struct {
- Children interface{} `json:"children"`
- } `json:"data"`
- }
- func getRedditPostUrls(link string) (map[string]string, error) {
- redditThread := new(redditThreadObject)
- headers := make(map[string]string)
- headers["Accept-Encoding"] = "identity"
- headers["User-Agent"] = sneakyUserAgent
- err := getJSONwithHeaders(link+".json", redditThread, headers)
- if err != nil {
- return nil, fmt.Errorf("failed to parse json from reddit post:\t%s", err)
- }
- redditPost := (*redditThread)[0].Data.Children.([]interface{})[0].(map[string]interface{})
- redditPostData := redditPost["data"].(map[string]interface{})
- if redditPostData["url_overridden_by_dest"] != nil {
- redditLink := redditPostData["url_overridden_by_dest"].(string)
- filename := fmt.Sprintf("Reddit-%s_%s %s", redditPostData["subreddit"].(string), redditPostData["id"].(string), filenameFromURL(redditLink))
- return map[string]string{redditLink: filename}, nil
- }
- return nil, nil
- }
- //#endregion
|