How to use redis to cache the list of relevant articles as values mapped with the articles' id as key in django?

models.py

from django.db import models from django_pandas.managers import DataFrameManager # Create your models here. class BcContent(models.Model): asset_id = models.PositiveIntegerField() title = models.CharField(max_length=255) alias = models.CharField(max_length=255) title_alias = models.CharField(max_length=255) introtext = models.TextField() state = models.IntegerField() sponsored = models.IntegerField() sectionid = models.PositiveIntegerField() mask = models.PositiveIntegerField() catid = models.PositiveIntegerField() created = models.DateTimeField() created_by = models.PositiveIntegerField() created_by_alias = models.CharField(max_length=255) modified = models.DateTimeField() modified_by = models.PositiveIntegerField() checked_out = models.PositiveIntegerField() checked_out_time = models.DateTimeField() publish_up = models.DateTimeField() publish_down = models.DateTimeField() images = models.TextField() urls = models.TextField() attribs = models.CharField(max_length=5120) version = models.PositiveIntegerField() parentid = models.PositiveIntegerField() ordering = models.IntegerField() metakey = models.TextField() metadesc = models.TextField() access = models.PositiveIntegerField() hits = models.PositiveIntegerField() metadata = models.TextField() featured = models.PositiveIntegerField() language = models.CharField(max_length=7) xreference = models.CharField(max_length=50) admin_push = models.IntegerField() author_id = models.PositiveIntegerField(blank=True, null=True) meta_title = models.CharField(max_length=255) og_title = models.CharField(max_length=255) og_description = models.TextField() lifestage_period = models.CharField(max_length=255) fb_post_id = models.CharField(max_length=255) is_instant_article = models.IntegerField() instant_article_text = models.TextField() objects = DataFrameManager() class Meta: managed = False db_table = 'bc_content'

Views.py

from django.shortcuts import HttpResponse from .models import BcContent import os import re from nltk.tokenize import word_tokenize, sent_tokenize from nltk.corpus import stopwords stop = set(stopwords.words('english')) import functools from nltk.stem.snowball import SnowballStemmer stemmer = SnowballStemmer("english", ignore_stopwords =True) from django.conf import settings from rest_framework.decorators import api_view from rest_framework.response import Response from rest_framework import status from django.core.cache import cache import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer from string import punctuation from sklearn.metrics.pairwise import linear_kernel from django.http import Http404 import numpy as np stop_words = f = open(os.path.join(settings.PROJECT_ROOT, 'stopwords.txt')) #f = open('stopwords.txt', 'r') for l in f.readlines(): stop_words.append(l.replace('n', '')) additional_stop_words = ['t','aah','aap','span','nbsp','don','doesn','isn','ve','ll','add', 'ndash','will','nan','q','article','lsquo','rsquo','ldquo','rdquo','personalised','please','read','download','app','here','more','experience','based','explore','bull','fact','myth','ndash','middot','lifestage','entire','collection','articles','reading','website','android','phone','a','zero','value',] stop_words += additional_stop_words stop_words = list(filter(None, stop_words)) def _removeNonAscii(s): return "".join(i for i in s if ord(i)<128) def clean_text(text): text = text.lower() cleanr = re.compile('<.*?>') text = re.sub(cleanr, '', text) text = re.sub(r"what's", "what is ", text) text = text.replace('(ap)', '') text = re.sub(r"'s", " is ", text) text = re.sub(r"'ve", " have ", text) text = re.sub(r"can't", "cannot ", text) text = re.sub(r"n't", " not ", text) text = re.sub(r"i'm", "i am ", text) text = re.sub(r"'re", " are ", text) text = re.sub(r"'d", " would ", text) text = re.sub(r"'ll", " will ", text) text = re.sub(r'W+', ' ', text) text = re.sub(r's+', ' ', text) text = re.sub(r"", "", text) text = re.sub(r"'", "", text) text = re.sub(r""", "", text) text = re.sub('[^a-zA-Z ?!]+', '', text) text = _removeNonAscii(text) text = text.strip() return text def tokenizer(text): text = clean_text(text) tokens = [word_tokenize(sent) for sent in sent_tokenize(text)] tokens = list(functools.reduce(lambda x,y: x+y, tokens)) tokens = list(filter(lambda token: token not in (stop_words + list(punctuation)) , tokens)) return tokens def preprocess(df): df['combined'] = df[['title','metakey', 'metadesc','introtext']].apply(lambda x: ' '.join(x), axis=1) df.drop(['title', 'metakey', 'metadesc', 'introtext'], axis = 1, inplace = True) #df.combined = df.combined.str.replace('[^ws]',' ') #df['combined'] = df['combined'].str.replace('d+', ' ') #df.combined = df.combined.str.replace('nbsp?' , ' ') #df.combined = df.combined.str.replace('value?' , ' ') df = df.dropna(subset = ['combined']) #df.combined = df.combined.str.replace('s+', ' ') df = df[(df.combined.map(len) > 600)] #df.reset_index(inplace=True, drop=True) df['tokens'] = '' df['tokens'] = df['combined'].progress_map(lambda d: tokenizer(d)) df['text_stemmed']=df['tokens'].apply(lambda x : [stemmer.stem(y) for y in x]) df.drop(['combined', 'tokens'], axis = 1, inplace = True) #df = df.iloc[:,1:2] df.set_index('id', inplace=True) #df['text_stemmed_sentence']=df['text_stemmed'].apply(lambda x : " ".join(x)) #df = df[(df.text_stemmed_sentence.map(len) > 600)] #df.reset_index(inplace=True, drop=True) #df['stemmed_tokens'] = df['text_stemmed_sentence'].progress_map(lambda d: tokenizer(d)) #df1 = df['stemmed_tokens'] return df #@api_view(['GET']) def detail(request, index): if index in cache: cache.get('index') #return HttpResponse("1") else: qs = BcContent.objects.all() df = qs.to_dataframe(fieldnames = ['id','title','metakey','metadesc','introtext']) df1 = preprocess(df) vectorizer = TfidfVectorizer(min_df=5, analyzer='word', ngram_range=(1, 2), stop_words='english') vz = vectorizer.fit_transform(list(df1['text_stemmed'].map(lambda tokens: ' '.join(tokens)))) cosine_similarities = linear_kernel(vz,vz) articlesRecommend = pd.DataFrame(cosine_similarities, columns = df1.index, index = df1.index) # for c in articlesRecommend: # y = np.array([articlesRecommend[c].nlargest(101).index.values]) #cache.set(, y, timeout = None) #result = cache.get('index') y = np.array([articlesRecommend[c].nlargest(101).index.values for c in articlesRecommend]) articles_df = pd.DataFrame(data = y, index = articlesRecommend.columns) recommend = articles_df.iloc[:,1:100].T for i in range(len(recommend['id'])): cache.set(recommend['index'][i], recommend['id'][i], timeout = None) #cache.set(df1.index, recommend, timeout = None) return HttpResponse(recommend.to_html())

I need help in this file. I just need to save my list of recommended list of articles for each article id on redis and from there the list would be parsed to production
Please help in setting the values on redis. how to get the list of relevant articles after passing the article id from the url??

url.py

from django.urls import path from . import views urlpatterns = [ path('<int:index>',views.detail, name='art_rec'), ]

By clicking "Post Your Answer", you acknowledge that you have read our updated terms of service, privacy policy and cookie policy, and that your continued use of the website is subject to these policies.

搜尋此網誌

Fjhtyj

How to use redis to cache the list of relevant articles as values mapped with the articles' id as key in django?

How to use redis to cache the list of relevant articles as values mapped with the articles' id as key in django?

Popular posts from this blog

PHP contact form sending but not receiving emails

iOS Top Alignment constraint based on screen (superview) height

PHP parse/syntax errors; and how to solve them?