10 Commits

Author SHA1 Message Date
055971a0e9 Giteafy the repo
Signed-off-by: Puranjay Savar Mattas <me@psmattas.com>
2025-09-03 09:51:40 +00:00
99abb706a6 Giteafy the repo
Signed-off-by: Puranjay Savar Mattas <me@psmattas.com>
2025-09-03 09:50:50 +00:00
42aae67117 Giteafy the repo
Signed-off-by: Puranjay Savar Mattas <me@psmattas.com>
2025-09-03 09:50:29 +00:00
35d6cd35dd Giteafy the repo
Signed-off-by: Puranjay Savar Mattas <me@psmattas.com>
2025-09-03 09:50:10 +00:00
c05c23d327 Giteafy the repo
Signed-off-by: Puranjay Savar Mattas <me@psmattas.com>
2025-09-03 09:49:33 +00:00
5615f35156 Giteafy the repo
Signed-off-by: Puranjay Savar Mattas <me@psmattas.com>
2025-09-03 09:48:12 +00:00
c0d4b3e9b8 Giteafy the repo
Signed-off-by: Puranjay Savar Mattas <me@psmattas.com>
2025-09-03 09:47:34 +00:00
19614b9707 PlagiarismChecker 2021-05-13 15:22:22 +05:30
a98908f979 Update 2021-05-13 14:37:03 +05:30
bd8b9ddf58 ContentAggregator 2021-05-13 14:36:05 +05:30
6 changed files with 263 additions and 11 deletions

139
ContentAggregator.py Normal file
View File

@@ -0,0 +1,139 @@
"""
----------------------------------------
Content Aggregator
----------------------------------------
Surfing through different websites and articles
in search of good and authentic content is a
time-consuming process. This Python project can
help you save time looking for content. A content
aggregator searches popular websites in search for
relevant content and then complies with all the
content and provides the user with unbiased content.
----------------------------------------
"""
import urllib, os, requests, datetime, subprocess
# reddit imports
import praw, pprint
# pip install feedparser
import feedparser
# stockexchange
from nsetools import Nse
# Place your CLIENT_ID & CLIENT_SECRET below
reddit = praw.Reddit(client_id='XXXXXXX',
client_secret='XXXXXXXXXXX',
grant_type_access='client_credentials',
user_agent='script/1.0')
# class Reddit:
# def TopNews(self):
# Add your favorite NEWS subreddits in the argument as many as you'd like.
# for submission in reddit.subreddit('News+WorldNews+UpliftingNews+').top(limit=10):
# top_news = reddit.domain(submission).top('month')ls
# print(top_news)
"""
Each class contains functions which further calls
APIs from the neccesary packages and the rest is
self explanatory I suppose
"""
class News:
def Indian_News(self):
newsfeed = feedparser.parse(
"http://feeds.feedburner.com/ndtvnews-india-news"
)
print("Today's News: ")
for i in range(0, 20):
entry = newsfeed.entries[i]
print(entry.title)
print(entry.summary)
print("------News Link--------")
print(entry.link)
print("###########################################")
print(' ')
print('-------------------------------------------------------------------------------------------------------')
print(' ')
class Medium:
# https://github.com/thepracticaldev/dev.to/issues/28#issuecomment-325544385
def medium_programming(self):
feed = feedparser.parse(
"https://medium.com/feed/tag/programming"
)
print("Programming Today: ")
for i in range(10):
entry = feed.entries[i]
print(entry.title)
print("URL: ")
print(entry.link)
print("###########################################")
print(' ')
print('-------------------------------------------------------------------------------------------------------')
print(' ')
def medium_python(self):
feed_python = feedparser.parse(
"https://medium.com/feed/tag/python"
)
print("Python Today: ")
for i in range(10):
entry = feed_python.entries[i]
print(entry.title)
print("URL: ")
print(entry.link)
print("###########################################")
print(' ')
print('-------------------------------------------------------------------------------------------------------')
print(' ')
def medium_developer(self):
feed_developer = feedparser.parse(
"https://medium.com/feed/tag/developer"
)
print("Developer News Today: ")
for i in range(5):
entry = feed_developer.entries[i]
print(entry.title)
print("URL: ")
print(entry.link)
print("###########################################")
print(' ')
print('-------------------------------------------------------------------------------------------------------')
print(' ')
class StockExchange:
def nse_stock(self):
nse = Nse()
print("TOP GAINERS OF YESTERDAY")
pprint.pprint(nse.get_top_gainers())
print("###########################################")
print(' ')
print("TOP LOSERS OF YESTERDAY")
pprint.pprint(nse.get_top_losers())
print("###########################################")
print(' ')
print('-------------------------------------------------------------------------------------------------------')
print(' ')
#/ objects inititalization
# reddit_object = Reddit()
News_object = News()
Medium_object = Medium()
StockExchange_object = StockExchange()
if __name__ == "__main__":
# Functions call of each class
# reddit_object.TopNews()
News_object.Indian_News()
Medium_object.medium_python()
Medium_object.medium_programming()
Medium_object.medium_developer()
StockExchange_object.nse_stock()

View File

@@ -0,0 +1,6 @@
# Advanced Projects:
| Serial No. | Program Name |
|------------|--------------|
|1 | [ContentAggregator.py](https://git.psmattas.com/psmattas/Python-Projects/src/branch/All-Projects/ContentAggregator.py) |
|2 | [PlagiarismChecker.py](https://git.psmattas.com/psmattas/Python-Projects/src/branch/All-Projects/PlagiarismChecker.py) |

View File

@@ -2,8 +2,8 @@
| Serial No. | Program Name | | Serial No. | Program Name |
|------------|--------------| |------------|--------------|
|1 | [MadLabGenerator](https://github.com/psavarmattas/Python-Projects/blob/master/MadLabGenerator.py) | |1 | [MadLabGenerator](https://git.psmattas.com/psmattas/Python-Projects/src/branch/All-Projects/MadLabGenerator.py) |
|2 | [NumberGuessing](https://github.com/psavarmattas/Python-Projects/blob/master/NumberGuessing.py) | |2 | [NumberGuessing](https://git.psmattas.com/psmattas/Python-Projects/src/branch/All-Projects/NumberGuessing.py) |
|3 | [RockPaperScisors](https://github.com/psavarmattas/Python-Projects/blob/master/RockPaperScissors.py) | |3 | [RockPaperScisors](https://git.psmattas.com/psmattas/Python-Projects/src/branch/All-Projects/RockPaperScissors.py) |
|4 | [WebsiteBlocker](https://github.com/psavarmattas/Python-Projects/blob/master/WebsiteBlocker.py) | |4 | [WebsiteBlocker](https://git.psmattas.com/psmattas/Python-Projects/src/branch/All-Projects/WebsiteBlocker.py) |
|5 | [BinarySearch](https://github.com/psavarmattas/Python-Projects/blob/master/BinarySearch.py) | |5 | [BinarySearch](https://git.psmattas.com/psmattas/Python-Projects/src/branch/All-Projects/BinarySearch.py) |

View File

@@ -2,8 +2,8 @@
| Serial No. | Program Name | | Serial No. | Program Name |
|------------|--------------| |------------|--------------|
|1 | [Calculator](https://github.com/psavarmattas/Python-Projects/blob/master/Calculator.py) | |1 | [Calculator](https://git.psmattas.com/psmattas/Python-Projects/src/branch/All-Projects/Calculator.py) |
|2 | [AlarmClock](https://github.com/psavarmattas/Python-Projects/blob/master/AlarmClock.py) & [youtube_alarm_videos](https://github.com/psavarmattas/Python-Projects/blob/master/youtube_alarm_videos.txt) | |2 | [AlarmClock](https://git.psmattas.com/psmattas/Python-Projects/src/branch/All-Projects/AlarmClock.py) & [youtube_alarm_videos](https://git.psmattas.com/psmattas/Python-Projects/src/branch/All-Projects/youtube_alarm_videos.txt) |
|3 | [TikTacToe](https://github.com/psavarmattas/Python-Projects/blob/master/TikTacToe.py) | |3 | [TikTacToe](https://git.psmattas.com/psmattas/Python-Projects/src/branch/All-Projects/master/TikTacToe.py) |
|4 | [DirectoryTreeGenerator](https://github.com/psavarmattas/Python-Projects/blob/master/DirectoryTreeGenerator.py) | |4 | [DirectoryTreeGenerator](https://git.psmattas.com/psmattas/Python-Projects/src/branch/All-Projects/master/DirectoryTreeGenerator.py) |
|5 | [CurrencyConverter](https://github.com/psavarmattas/Python-Projects/blob/master/CurrencyConverter.py) | |5 | [CurrencyConverter](https://git.psmattas.com/psmattas/Python-Projects/src/branch/All-Projects/master/CurrencyConverter.py) |

107
PlagiarismChecker.py Normal file
View File

@@ -0,0 +1,107 @@
"""
----------------------------------------
Plagiarism Checker
----------------------------------------
With content creation and blogging one of
the good businesses in the market everyone
wants to try their hands on this but some
lack sufficient funds to give their articles
a free plagiarism check as mostly plagiarism
checkers do not come for free. Building a
Python plagiarism checker could be built here
using a natural language processing library
along with the search API to search the first
few pages of Google and detect plagiarism if any.
----------------------------------------
"""
import re
import nltk; nltk.download('punkt')
from nltk.util import ngrams, pad_sequence, everygrams
from nltk.tokenize import word_tokenize
from nltk.lm import MLE, WittenBellInterpolated
import numpy as np
import plotly.graph_objects as go
from scipy.ndimage import gaussian_filter
# Training data file
train_data_file = ""
# read training data
with open(train_data_file) as f:
train_text = f.read().lower()
# apply preprocessing (remove text inside square and curly brackets and rem punc)
train_text = re.sub(r"\[.*\]|\{.*\}", "", train_text)
train_text = re.sub(r'[^\w\s]', "", train_text)
# set ngram number
n = 4
# pad the text and tokenize
training_data = list(pad_sequence(word_tokenize(train_text), n,
pad_left=True,
left_pad_symbol="<s>"))
# generate ngrams
ngrams = list(everygrams(training_data, max_len=n))
print("Number of ngrams:", len(ngrams))
# build ngram language models
model = WittenBellInterpolated(n)
model.fit([ngrams], vocabulary_text=training_data)
print(model.vocab)
# testing data file
test_data_file = ""
# Read testing data
with open(test_data_file) as f:
test_text = f.read().lower()
test_text = re.sub(r'[^\w\s]', "", test_text)
# Tokenize and pad the text
testing_data = list(pad_sequence(word_tokenize(test_text), n,
pad_left=True,
left_pad_symbol="<s>"))
print("Length of test data:", len(testing_data))
# assign scores
scores = []
for i, item in enumerate(testing_data[n-1:]):
s = model.score(item, testing_data[i:i+n-1])
scores.append(s)
scores_np = np.array(scores)
# set width and height
width = 8
height = np.ceil(len(testing_data)/width).astype("int32")
print("Width, Height:", width, ",", height)
# copy scores to rectangular blank array
a = np.zeros(width*height)
a[:len(scores_np)] = scores_np
diff = len(a) - len(scores_np)
# apply gaussian smoothing for aesthetics
a = gaussian_filter(a, sigma=1.0)
# reshape to fit rectangle
a = a.reshape(-1, width)
# format labels
labels = [" ".join(testing_data[i:i+width]) for i in range(n-1, len(testing_data), width)]
labels_individual = [x.split() for x in labels]
labels_individual[-1] += [""]*diff
labels = [f"{x:60.60}" for x in labels]
# create heatmap
fig = go.Figure(data=go.Heatmap(
z=a, x0=0, dx=1,
y=labels, zmin=0, zmax=1,
customdata=labels_individual,
hovertemplate='%{customdata} <br><b>Score:%{z:.3f}<extra></extra>',
colorscale="burg"))
fig.update_layout({"height":height*28, "width":1000, "font":{"family":"Courier New"}})
fig['layout']['yaxis']['autorange'] = "reversed"
fig.show()

View File

@@ -1,5 +1,5 @@
# Introduction: # Introduction:
_`Last Updated: May 04' 2021`_ _`Last Updated: May 13' 2021`_
Here are programs that every budding programmer who is learning to code in Python should start with. These programs are a compilation of many different types of programs and levels of programming you should try. You can use the table in "Order Of Programs" file to find the order in which it is best to program in. Here are programs that every budding programmer who is learning to code in Python should start with. These programs are a compilation of many different types of programs and levels of programming you should try. You can use the table in "Order Of Programs" file to find the order in which it is best to program in.