How to use redis to cache the list of relevant articles as values mapped with the articles' id as key in django?
How to use redis to cache the list of relevant articles as values mapped with the articles' id as key in django?
models.py
from django.db import models
from django_pandas.managers import DataFrameManager
# Create your models here.
class BcContent(models.Model):
asset_id = models.PositiveIntegerField()
title = models.CharField(max_length=255)
alias = models.CharField(max_length=255)
title_alias = models.CharField(max_length=255)
introtext = models.TextField()
state = models.IntegerField()
sponsored = models.IntegerField()
sectionid = models.PositiveIntegerField()
mask = models.PositiveIntegerField()
catid = models.PositiveIntegerField()
created = models.DateTimeField()
created_by = models.PositiveIntegerField()
created_by_alias = models.CharField(max_length=255)
modified = models.DateTimeField()
modified_by = models.PositiveIntegerField()
checked_out = models.PositiveIntegerField()
checked_out_time = models.DateTimeField()
publish_up = models.DateTimeField()
publish_down = models.DateTimeField()
images = models.TextField()
urls = models.TextField()
attribs = models.CharField(max_length=5120)
version = models.PositiveIntegerField()
parentid = models.PositiveIntegerField()
ordering = models.IntegerField()
metakey = models.TextField()
metadesc = models.TextField()
access = models.PositiveIntegerField()
hits = models.PositiveIntegerField()
metadata = models.TextField()
featured = models.PositiveIntegerField()
language = models.CharField(max_length=7)
xreference = models.CharField(max_length=50)
admin_push = models.IntegerField()
author_id = models.PositiveIntegerField(blank=True, null=True)
meta_title = models.CharField(max_length=255)
og_title = models.CharField(max_length=255)
og_description = models.TextField()
lifestage_period = models.CharField(max_length=255)
fb_post_id = models.CharField(max_length=255)
is_instant_article = models.IntegerField()
instant_article_text = models.TextField()
objects = DataFrameManager()
class Meta:
managed = False
db_table = 'bc_content'
Views.py
from django.shortcuts import HttpResponse
from .models import BcContent
import os
import re
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
stop = set(stopwords.words('english'))
import functools
from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer("english", ignore_stopwords =True)
from django.conf import settings
from rest_framework.decorators import api_view
from rest_framework.response import Response
from rest_framework import status
from django.core.cache import cache
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from string import punctuation
from sklearn.metrics.pairwise import linear_kernel
from django.http import Http404
import numpy as np
stop_words =
f = open(os.path.join(settings.PROJECT_ROOT, 'stopwords.txt'))
#f = open('stopwords.txt', 'r')
for l in f.readlines():
stop_words.append(l.replace('n', ''))
additional_stop_words = ['t','aah','aap','span','nbsp','don','doesn','isn','ve','ll','add', 'ndash','will','nan','q','article','lsquo','rsquo','ldquo','rdquo','personalised','please','read','download','app','here','more','experience','based','explore','bull','fact','myth','ndash','middot','lifestage','entire','collection','articles','reading','website','android','phone','a','zero','value',]
stop_words += additional_stop_words
stop_words = list(filter(None, stop_words))
def _removeNonAscii(s):
return "".join(i for i in s if ord(i)<128)
def clean_text(text):
text = text.lower()
cleanr = re.compile('<.*?>')
text = re.sub(cleanr, '', text)
text = re.sub(r"what's", "what is ", text)
text = text.replace('(ap)', '')
text = re.sub(r"'s", " is ", text)
text = re.sub(r"'ve", " have ", text)
text = re.sub(r"can't", "cannot ", text)
text = re.sub(r"n't", " not ", text)
text = re.sub(r"i'm", "i am ", text)
text = re.sub(r"'re", " are ", text)
text = re.sub(r"'d", " would ", text)
text = re.sub(r"'ll", " will ", text)
text = re.sub(r'W+', ' ', text)
text = re.sub(r's+', ' ', text)
text = re.sub(r"", "", text)
text = re.sub(r"'", "", text)
text = re.sub(r""", "", text)
text = re.sub('[^a-zA-Z ?!]+', '', text)
text = _removeNonAscii(text)
text = text.strip()
return text
def tokenizer(text):
text = clean_text(text)
tokens = [word_tokenize(sent) for sent in sent_tokenize(text)]
tokens = list(functools.reduce(lambda x,y: x+y, tokens))
tokens = list(filter(lambda token: token not in (stop_words + list(punctuation)) , tokens))
return tokens
def preprocess(df):
df['combined'] = df[['title','metakey', 'metadesc','introtext']].apply(lambda x: ' '.join(x), axis=1)
df.drop(['title', 'metakey', 'metadesc', 'introtext'], axis = 1, inplace = True)
#df.combined = df.combined.str.replace('[^ws]',' ')
#df['combined'] = df['combined'].str.replace('d+', ' ')
#df.combined = df.combined.str.replace('nbsp?' , ' ')
#df.combined = df.combined.str.replace('value?' , ' ')
df = df.dropna(subset = ['combined'])
#df.combined = df.combined.str.replace('s+', ' ')
df = df[(df.combined.map(len) > 600)]
#df.reset_index(inplace=True, drop=True)
df['tokens'] = ''
df['tokens'] = df['combined'].progress_map(lambda d: tokenizer(d))
df['text_stemmed']=df['tokens'].apply(lambda x : [stemmer.stem(y) for y in x])
df.drop(['combined', 'tokens'], axis = 1, inplace = True)
#df = df.iloc[:,1:2]
df.set_index('id', inplace=True)
#df['text_stemmed_sentence']=df['text_stemmed'].apply(lambda x : " ".join(x))
#df = df[(df.text_stemmed_sentence.map(len) > 600)]
#df.reset_index(inplace=True, drop=True)
#df['stemmed_tokens'] = df['text_stemmed_sentence'].progress_map(lambda d: tokenizer(d))
#df1 = df['stemmed_tokens']
return df
#@api_view(['GET'])
def detail(request, index):
if index in cache:
cache.get('index')
#return HttpResponse("1")
else:
qs = BcContent.objects.all()
df = qs.to_dataframe(fieldnames = ['id','title','metakey','metadesc','introtext'])
df1 = preprocess(df)
vectorizer = TfidfVectorizer(min_df=5, analyzer='word', ngram_range=(1, 2), stop_words='english')
vz = vectorizer.fit_transform(list(df1['text_stemmed'].map(lambda tokens: ' '.join(tokens))))
cosine_similarities = linear_kernel(vz,vz)
articlesRecommend = pd.DataFrame(cosine_similarities, columns = df1.index, index = df1.index)
# for c in articlesRecommend:
# y = np.array([articlesRecommend[c].nlargest(101).index.values])
#cache.set(, y, timeout = None)
#result = cache.get('index')
y = np.array([articlesRecommend[c].nlargest(101).index.values for c in articlesRecommend])
articles_df = pd.DataFrame(data = y, index = articlesRecommend.columns)
recommend = articles_df.iloc[:,1:100].T
for i in range(len(recommend['id'])):
cache.set(recommend['index'][i], recommend['id'][i], timeout = None)
#cache.set(df1.index, recommend, timeout = None)
return HttpResponse(recommend.to_html())
I need help in this file. I just need to save my list of recommended list of articles for each article id on redis and from there the list would be parsed to production
Please help in setting the values on redis. how to get the list of relevant articles after passing the article id from the url??
url.py
from django.urls import path
from . import views
urlpatterns = [
path('<int:index>',views.detail, name='art_rec'),
]
By clicking "Post Your Answer", you acknowledge that you have read our updated terms of service, privacy policy and cookie policy, and that your continued use of the website is subject to these policies.