From ac277386716b1e2a9f428aa829cbd97ee35e4915 Mon Sep 17 00:00:00 2001 From: KaKi87 Date: Wed, 27 Mar 2019 09:55:32 +0100 Subject: [PATCH] Upgrade to 1.0.1 Cf. changelog in README.md --- .gitignore | 3 + README.md | 214 +++++++++++++++++++++++++++++++++++++++++++++------ index.js | 129 ++++++++++++++++++++++++++++--- package.json | 2 +- 4 files changed, 312 insertions(+), 36 deletions(-) diff --git a/.gitignore b/.gitignore index 7d7830d..acc96ab 100644 --- a/.gitignore +++ b/.gitignore @@ -78,3 +78,6 @@ typings/ # JetBrains .idea + +# Tests folder +tests diff --git a/README.md b/README.md index dfcbef1..c7c25ce 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ Insta.getProfile('tiana_kaki', profile => { { "id": "6965336095", "name": "Tiana", - "pic": "https://scontent-mrs1-1.cdninstagram.com/....jpg", + "pic": "https://scontent-mrs1-1.cdninstagram.com/vp/343360e2c822f78d6d302a1847db1833/5D2F4763/t51.2885-19/s320x320/26268757_235337373674020_3943053532785016832_n.jpg?_nc_ht=scontent-mrs1-1.cdninstagram.com", "bio": "", "private": false, "verified": false, @@ -27,54 +27,222 @@ Insta.getProfile('tiana_kaki', profile => { "lastPosts": [ "Buth7q-nTnW", "BtwBWbmnXtx", - "BtHJSmMhiDY" - ] + "BtHJSmMhiDY", + "BtHI5W9hnIC", + "Bswe8NNH2zM", + "BsTROEhH_vF", + "BrNOGceHFVP", + "BrNNjEpnPdb", + "BrNM6qyHf-3", + "BpkbkgpnjEw", + "BnhdjA1HxuL", + "BmIzQtoHGmf" + ], + "link": "https://instagram.com/tiana_kaki" +} +``` + +### Get hashtag data + +```js +Insta.getHashtag('limousin', hashtag => { + console.log(hashtag); +}); +``` + +```json +{ + "id": "17843780971049111", + "pic": "https://scontent-mrs1-1.cdninstagram.com/vp/58f240d6db1e0e45c3a7f2bc27c00ea2/5D36D2D6/t51.2885-15/e35/s150x150/53584789_129121004880886_3869886162348160858_n.jpg?_nc_ht=scontent-mrs1-1.cdninstagram.com", + "posts": 182557, + "featuredPosts": [ + "Bvdj-5gF0GN", + "BvZHGsrHh7F", + "BvgO5A9AwiG", + "Bu4B1l8BxTb", + "Bu8NV9qlpRi", + "BvgOruwgzsv", + "BvZvpEnpaCE", + "BvUhOgzAuse", + "BvKTAT4l2JS" + ], + "lastPosts": [ + "BvgSlCFBcTx", + "BvgSHvDpTyb", + "BvgRtV6Dmh3", + "BvgRIPiFPnP", + "BvgPKdlnw3a", + "BvgPEnCHCbg", + "BvgO5A9AwiG", + "BvgOruwgzsv", + "BvgNVk2DF4e", + "BvgKPISB0MY", + "BvgJKwahsC1", + "BvgISY5puZl", + "BvgHNxdgFKN", + "BvgGqA3J23x", + "BvgGmbSpq2g", + "BvgFFgEJKbK", + "BvgDznWHyUo", + "BvgCwLOnQ5X", + "BvgCEnVF1A6", + "BvgA4VCHItL", + "Bvf-4LLlf3H", + "BvfhY3SFQ0H", + "BvfWrfvgaiH", + "BvfPVIaj2Un", + "BvfKXQGpUhH", + "BvfH4a6nl-n", + "BvfHzRuhBzA", + "BvfGheRJwpZ", + "BvfFeB9A8an", + "BvfEyF8pHrY", + "BvfEDzDAm41", + "BvfDhyZHCZw", + "BvfDR7EFRgE", + "BvfBGwLlx3C", + "BvfApnTFaqv", + "BvfAQmPpjFI", + "Bve-j9ynlH4", + "Bve-XuBp4VS", + "Bve9DD4pLA1", + "Bve8yWAl8QX", + "Bve6y3egIYp", + "Bve16KThvN7", + "Bve5WlTAd61", + "Bve5VQqgVUX", + "Bve3_eJJuuv", + "Bve2RFEHPON", + "Bve1QpXlXSb", + "Bve1EfKHNTj", + "Bve0XaPHLWS", + "Bve0I9KnX-P", + "BvezvP7pSPL", + "BvezjDjlKPM", + "Bvezb0EHf_v", + "BvezYCsArGN", + "BvezTnbl-wK", + "BvezK7jJmb0", + "BveybI_Hr1m", + "BvexjnKlb8l", + "BvewSZlAQHI", + "BvevMpXgVYR", + "BvevJP2pxMh", + "BveuxmcAhq7", + "Bves_jzpeQ7", + "BvesnHPJ5UT", + "BvesY2PHs1c", + "Bver2RhAAGj", + "Bveq1e4FmPq", + "Bveq0WaBMP3", + "BveqsJJA2gL", + "BZ3O78ijPQP", + "BZwd7y6DwTe", + "BZt4NeLDooK" + ], + "link": "https://instagram.com/explore/tags/limousin/" } ``` ### Get post data ```js -Insta.getPost('Buth7q-nTnW', post => { +Insta.getPost('BrNM6qyHf-3', post => { console.log(post); }); ``` ```json { - "id": "1812187949221841859", - "timestamp": 1530249659, - "likes": 8, - "comments": 0, - "caption": "Vendredi dernier sur l'Ile du Souvenir du Parc de la Tête d'Or, à Lyon.", - "location": { - "name": "Parc de la Tête d'Or", - "city": "Lyon, France" - }, + "id": "1931256623437578167", + "timestamp": 1544443751, + "likes": 54, + "comments": [ + { + "user": "iamgeekcat", + "content": "toi aussi tu l'a achetée 👏", + "timestamp": 1545074290, + "hashtags": null, + "mentions": null, + "likes": 1 + }, + { + "user": "tiana_kaki", + "content": "@iamgeekcat Ouais ^^", + "timestamp": 1545079687, + "hashtags": null, + "mentions": [ + "@iamgeekcat" + ], + "likes": 0 + } + ], + "caption": "La montre connectée de #Xiaomi #MiBand3\nMerci @guillaume_slash pour #NightSight !", + "hashtags": [ + "#Xiaomi", + "#MiBand3", + "#NightSight" + ], + "mentions": [ + "@guillaume_slash" + ], + "tagged": [], + "location": null, "author": { "id": "6965336095", "username": "tiana_kaki", "name": "Tiana", - "pic": "https://scontent-mrs1-1.cdninstagram.com/....jpg", - "verified": false + "pic": "https://scontent-mrs1-1.cdninstagram.com/vp/8123c0f64c3ef70d222a6a7a5379f87a/5D36A493/t51.2885-19/s150x150/26268757_235337373674020_3943053532785016832_n.jpg?_nc_ht=scontent-mrs1-1.cdninstagram.com", + "verified": false, + "link": "https://instagram.com/tiana_kaki" }, "contents": [ { - "url": "https://scontent-mrs1-1.cdninstagram.com/....jpg", - "type": "photo" - }, - { - "url": "https://scontent-mrs1-1.cdninstagram.com/....jpg", + "url": "https://scontent-mrs1-1.cdninstagram.com/vp/96aff5be0bba30fb39994c35624f3bfb/5D4A7A95/t51.2885-15/e35/45881951_215607719355847_3477452604092009384_n.jpg?_nc_ht=scontent-mrs1-1.cdninstagram.com", "type": "photo" } ] } ``` -`comments = -1` when disabled. +`comments` is `null` when disabled by author. + +### Subscribe to posts + +#### From user + +```js +Insta.subscribeUserPosts('tiana_kaki', posts => { + console.log(posts); +}, { + interval: 0, + lastPost: 'BrNM6qyHf-3' +}); +``` + +`options` (last parameter) are optional. + +#### From hashtag + +```js +Insta.subscribeHashtagPosts('selfie', posts => { + console.log(posts); +}, { + interval: 0 +}); +``` ## Planned features - Support for authentication (private profiles, stories) -- Post comments list -- Events : new/deleted post, comment, like +- More events : deleted post, added/deleted comment & like + +## Changelog + +- `1.0.0` (2019-03-26) - Initial release +- `1.0.1` (2019-03-27) - Added improvements & features + - Fixed throw error scope + - Fixed single photo post wrongly structured + - Added support for comments + - Added support for hashtags, mentions and tags in posts and comments + - Added posts subscriptions feature from users (untested) and hashtags diff --git a/index.js b/index.js index 2e07da0..6e65855 100644 --- a/index.js +++ b/index.js @@ -8,13 +8,15 @@ const module.exports = { getProfile(username, callback){ - request(`${insta}/${username}`,(err, res, body) => { + const link = `${insta}/${username}`; + request(link,(err, res, body) => { if(res.statusCode === 404) throw new Error('Instagram user not found'); body += ``; + let u; try { const user = JSON.parse(parse(body).body.textContent); - callback({ + u = { id: user['id'], name: user['full_name'], pic: user['profile_pic_url_hd'], @@ -26,36 +28,82 @@ module.exports = { following: user['edge_follow']['count'], posts: user['edge_owner_to_timeline_media']['count'], lastPosts: user['edge_owner_to_timeline_media']['edges'] - .map(post => post['node']['shortcode']) - }); + .map(post => post['node']['shortcode']), + link + }; } catch(e){ throw new Error('Instagram parsing error'); } + callback(u); + }); + }, + getHashtag(hashtag, callback){ + const link = `${insta}/explore/tags/${hashtag}/`; + request(link, (err, res, body) => { + if(res.statusCode === 404) + throw new Error('Instagram hashtag not found'); + body += ``; + let h; + try { + const hashtag = JSON.parse(parse(body).body.textContent); + h = { + id: hashtag['id'], + pic: hashtag['profile_pic_url'], + posts: hashtag['edge_hashtag_to_media']['count'], + featuredPosts: hashtag['edge_hashtag_to_top_posts']['edges'] + .map(post => post['node']['shortcode']), + lastPosts: hashtag['edge_hashtag_to_media']['edges'] + .map(post => post['node']['shortcode']), + link + } + } catch(e){ throw new Error('Instagram parsing error'); } + callback(h); }); }, getPost(shortcode, callback){ - request(`${insta}/p/${shortcode}`, (err, res, body) => { + const link = `${insta}/p/${shortcode}`; + request(link, (err, res, body) => { if(res.statusCode === 404) throw new Error('Instagram post not found'); body += ``; - let p = null; + let p; try { - const post = JSON.parse(parse(body).body.textContent); + const + post = JSON.parse(parse(body).body.textContent), + caption = post['edge_media_to_caption']['edges'].length > 0 + ? post['edge_media_to_caption']['edges'][0]['node']['text'] : null, + username = post['owner']['username'], + hashtagsRegex = /(?<=[\s>])#(\d*[A-Za-z_]+\d*)\b(?!;)/g, + usernamesRegex = /@([A-Za-z0-9_](?:(?:[A-Za-z0-9_]|(?:\\.(?!\\.))){0,28}(?:[A-Za-z0-9_]))?)/g; p = { id: post['id'], timestamp: post['taken_at_timestamp'], likes: post['edge_media_preview_like']['count'], - comments: post['comments_disabled'] ? -1 : post['edge_media_to_comment']['count'], - caption: post['edge_media_to_caption']['edges'].length > 0 ? post['edge_media_to_caption']['edges'][0]['node']['text'] : null, + comments: post['comments_disabled'] ? null : post['edge_media_to_comment']['edges'] + .map(c => ({ + user: c['node']['owner']['username'], + content: c['node']['text'], + timestamp: c['node']['created_at'], + hashtags: c['node']['text'].match(hashtagsRegex), + mentions: c['node']['text'].match(usernamesRegex), + likes: c['node']['edge_liked_by']['count'] + })), + caption, + hashtags: caption ? caption.match(hashtagsRegex) : null, + mentions: caption ? caption.match(usernamesRegex) : null, + tagged: post['edge_media_to_tagged_user']['edges'] + .map(u => u['node']['user']['username']), location: post['location'] ? { name: post['location']['name'], city: JSON.parse(post['location']['address_json'])['city_name'] } : null, author: { id: post['owner']['id'], - username: post['owner']['username'], + username, name: post['owner']['full_name'], pic: post['owner']['profile_pic_url'], - verified: post['owner']['is_verified'] - } + verified: post['owner']['is_verified'], + link: `${insta}/${username}` + }, + link }; switch(post['__typename']){ case 'GraphImage': @@ -79,5 +127,62 @@ module.exports = { } catch(e){ throw new Error('Instagram parsing error'); } callback(p); }); + }, + subscribeUserPosts(username, onPosts, options){ + const interval = options['interval'] || 30; + let lastPost = options['lastPost'] || null; + const checkNewPosts = () => { + module.exports.getProfile(username, profile => { + const _lastPost = profile.lastPosts[0]; + if(_lastPost !== lastPost){ + lastPost = _lastPost; + if(!_lastPost){ + onPosts(lastPost); + setTimeout(checkNewPosts, interval); + } + else { + const posts = []; + for(let i = 0; i < profile.lastPosts.indexOf(lastPost); i++){ + posts.push(profile.lastPosts[i]); + } + onPosts(posts); + setTimeout(checkNewPosts, interval); + } + lastPost = _lastPost; + } + else { + setTimeout(checkNewPosts, interval); + } + }); + }; + checkNewPosts(); + }, + subscribeHashtagPosts(hashtag, onPosts, options){ + const interval = options['interval'] || 30; + let lastPost = options['lastPost'] || null; + const checkNewPosts = () => { + module.exports.getHashtag(hashtag, hashtag => { + const _lastPost = hashtag.lastPosts[0]; + if(_lastPost !== lastPost){ + if(!_lastPost){ + onPosts(lastPost); + setTimeout(checkNewPosts, interval); + } + else { + const posts = []; + for(let i = 0; i < hashtag.lastPosts.indexOf(lastPost); i++){ + posts.push(hashtag.lastPosts[i]); + } + onPosts(posts); + setTimeout(checkNewPosts, interval); + } + lastPost = _lastPost; + } + else { + setTimeout(checkNewPosts, interval); + } + }); + }; + checkNewPosts(); } }; diff --git a/package.json b/package.json index dff9056..82bf81d 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@kaki87/ig-scraper", - "version": "1.0.0", + "version": "1.0.1", "description": "Instagrap scraper without authenticated API", "keywords": ["insta", "instagram", "ig", "scraper", "ig-scraper", "instagrap-scraper", "api", "scraper"], "homepage": "https://git.kaki87.net/KaKi87/ig-scraper",