Stats and output files

This commit is contained in:
bt0dotninja 2020-03-03 13:21:11 -06:00
commit 4c3696ee09
4 changed files with 443 additions and 0 deletions

View file

@ -1,3 +1,9 @@
###############################################################################
# Please note that this is required only for scraping the data from groups using
# python-fedora API. You do not have to fill it up if you are not going to
# pull data of users of a particular FAS group.
###############################################################################
[FAS]
user: YourFASUser
pass: YourFASPass

20
fedstats.py Normal file
View file

@ -0,0 +1,20 @@
#!/usr/bin/env python3
# A class to get fedora community data from fas interfaces and Datagreapper
# Alberto Rodriguez Sanchez bt0dotninja@fedoraproject.org
# Renato Silva resilva87@fedoraproject.org
from fedora.client.fas2 import AccountSystem
from fedora.client import AuthError
from collections import Counter
from datetime import datetime
import sys
import numpy as np
import pandas as pd
import json
import requests
import configparser

278
output.py Normal file
View file

@ -0,0 +1,278 @@
from datetime import date, timedelta
import fedmsg.meta
import fedmsg
import stats
import pygal
import math
import json
import csv
import os
# Default global variables
subcategory_json = None
category_json = None
path = ''
filename = stats.values['user']
csv_init = text_init = False
mode = 'text'
cat = None
# Gets a drawable object argument and renders an SVG Image of it.
def draw_svg(graph_obj):
if cat is None:
fname = "%s%s/%s_main.svg" % (path, stats.values['user'], filename)
print("[*] Output saved to ", fname)
else:
fname = "%s%s/%s_%smain.svg" % (path, stats.values['user'],
filename, cat)
print("[*] Output saved to ", fname)
graph_obj.render_to_file(fname)
# Gets a drawable object argument and renders a PNG image of it.
def draw_png(graph_obj):
if cat is None:
fname = "%s%s/%s_main.png" % (path, stats.values['user'], filename)
else:
fname = "%s%s/%s_%smain.png" % (path, stats.values['user'],
filename, cat)
graph_obj.render_to_png(fname)
# Generates a drawable pie chart object from a dictionary passed.
def draw_pie(output_json, title):
pie_chart = pygal.Pie(inner_radius=0.4, width=500, height=500)
pie_chart.title = str(title)
for key in output_json:
percent = output_json[key] / float(sum(output_json.values())) * 100
pie_chart.add(str(key), round(percent, 2))
return pie_chart
# Generates a drawable pie chart object from a dictionary passed.
def draw_bar(output_json, title):
bar_chart = pygal.Bar(width=500, height=500)
bar_chart.title = str(title)
for key in output_json:
bar_chart.add(str(key), output_json[key])
return bar_chart
# Generates CSV report for the user from the dictionary passed
def save_csv(output_json):
global csv_init, cat
fname = "%s%s/%s_main.csv" % (path, stats.values['user'], filename)
fout = open(fname, 'a')
csvw = csv.writer(fout)
# Write the dates into CSV
if not text_init and stats.end and stats.start:
csvw.writerows(
[['Start Date : ', stats.start],
['End Date : ', stats.end],
['']])
csv_init = True
# Initial heading row
data = [['Username', 'Category', 'Activity Count', 'Percentage'], []]
for key in output_json:
percent = round(output_json[key] / float(sum(output_json.values())) *
100, 2)
if cat is not None and cat.capitalize() != key.capitalize():
data.append([stats.values['user'],
cat.capitalize() + "." + key.capitalize(),
output_json[key],
str(percent) + '%'])
else:
data.append([stats.values['user'], key.capitalize(),
output_json[key], str(percent) + '%'])
# Insert blank lines and total
data.append([''])
data.append(['', 'Total : ', sum(output_json.values())])
data.append([''])
csvw.writerows(data)
fout.close()
def show_gource(unicode_json):
# Thanks Ralph. Color codes taken from fedmsg2gource
procs = [proc.__name__.lower() for proc in fedmsg.meta.processors]
colors = ["FFFFFF", "008F37", "FF680A", "CC4E00",
"8F0058", "8F7E00", "37008F", "7E008F"]
n_wraps = int(math.ceil(len(procs) / float(len(colors))))
colors = colors * n_wraps
color_lookup = dict(zip(procs, colors))
fname = "%s%s/%s_main.gource" % (path, stats.values['user'], filename)
fout = open(fname, 'w')
for activity in unicode_json['raw_messages']:
try:
user = list(fedmsg.meta.msg2usernames(activity))[0]
except IndexError:
user = stats.values['user']
fout.write(u"%i|%s|A|%s|%s\n" % (
activity['timestamp'],
user,
activity['topic'].split('.')[4] + " - " + activity['topic'].split('.')[3],
color_lookup[activity['topic'].split('.')[3]],
))
fout.close()
os.system("cat " + fname + " |gource --log-format custom --highlight-user "
+ stats.values['user'] + " -c 0.5 -")
# Saves category-wise text report of a user.
def save_text_log(unicode_json):
fname = "%s%s/%s_main.txt" % (path, stats.values['user'], filename)
fout = open(fname, 'w')
# Category-wise Log
fout.write("\n\n*** Category-wise activities ***\n\n")
for category in stats.return_categories():
flag = True
actcount = 0
for activity in unicode_json['raw_messages']:
if category == activity['topic'].split('.')[3]:
actcount += 1
# Print the category once
if flag is True:
fout.write(
"\n\n** Category : " +
category.capitalize() +
" **\n")
flag = False
try:
fout.write("* " + fedmsg.meta.msg2subtitle(activity).encode(
'utf-8') + "\n")
except AttributeError:
pass
fout.write("\nTotal Entries in category : " + str(actcount) + "\n")
fout.write("\nPercentage participation in category : " +
str(round(100 * actcount /
float(unicode_json['total']), 2)) + "\n")
fout.close()
def save_text_metrics(output_json):
global text_init
fname = path + stats.values['user'] + '/' + filename + '_main.txt'
print(fname)
fout = open(fname, 'a')
# Write the dates into CSV
if not text_init and stats.end and stats.start:
fout.write(
[['Start Date : ', stats.start],
['End Date : ', stats.end],
['']])
text_init = True
# Initial heading row
data = 'Username\t\tCategory\t\tCount\t\tPercentage\n'
for key in output_json:
percent = round(output_json[key] / float(sum(output_json.values())) *
100, 2)
if cat is not None and cat.capitalize() != key.capitalize():
data += '%s\t\t%s\t\t%d\t\t%s\n' % (
stats.values['user'],
cat.capitalize() + "." + key.capitalize(),
output_json[key],
str(percent) + '%')
else:
data += '%s\t\t%s\t\t%d\t\t%s\n' % (
stats.values['user'], key.capitalize(),
output_json[key], str(percent) + '%')
# Insert blank lines and total
data += '\n\n Total : %d \n' % (sum(output_json.values()))
fout.write(data)
fout.close()
# Saves the markdown version of the text log
def save_markdown(unicode_json):
fname = path + stats.values['user'] + '/' + filename + '_main.md'
fout = open(fname, 'w')
# Category-wise Log, markdown ready
fout.write("\n\n### Category-wise activities\n\n")
for category in stats.return_categories():
flag = True
actcount = 0
for activity in unicode_json['raw_messages']:
if category == activity['topic'].split('.')[3]:
actcount += 1
# Print the category once
if flag is True:
fout.write(
"\n\n#### Category : " +
category.capitalize() +
"\n")
flag = False
try:
fout.write("* " + fedmsg.meta.msg2subtitle(activity).encode(
'utf-8', errors='ignore') + "\n")
except AttributeError:
pass
fout.write("\n* **Total Entries in category :** " +
str(actcount) + "\n")
fout.write("\n* **Percentage participation in category :** " +
str(round(100 * actcount /
float(unicode_json['total']), 2)) + "\n")
fout.close()
# Saves the JSON as a file.
def save_json(unicode_json):
fname = path + stats.values['user'] + '/' + filename + '_main.json'
try:
with open(fname, 'w') as outfile:
json.dump(unicode_json, outfile)
except IOError:
print("[!] Could not write into directory. Check Permissions")
# Identifies categories & generates drawable objects for the above functions.
def generate_graph(output_json, title, category=None, gtype=None):
global path
if stats.group:
path = stats.group + '/'
if not os.path.exists(stats.group):
os.makedirs(stats.group)
if not os.path.exists(path + stats.values['user']):
os.makedirs(path + stats.values['user'])
else:
if not os.path.exists(stats.values['user']):
os.makedirs(stats.values['user'])
global cat
cat = category
graph_obj = None
print('[*] Readying Output..')
if mode.lower() == 'svg':
if gtype == 'pie':
graph_obj = draw_pie(output_json, title)
elif gtype == 'bar':
graph_obj = draw_bar(output_json, title)
draw_svg(graph_obj)
elif mode.lower() == 'png':
if gtype == 'pie':
graph_obj = draw_pie(output_json, title)
elif gtype == 'bar':
graph_obj = draw_bar(output_json, title)
draw_png(graph_obj)
elif mode.lower() == 'json':
save_json(output_json)
elif mode.lower() == 'text':
if stats.log:
save_text_log(output_json)
else:
save_text_metrics(output_json)
elif mode.lower() == 'csv':
save_csv(output_json)
elif mode.lower() == 'markdown':
save_markdown(output_json)
elif mode.lower() == 'gource':
show_gource(output_json)
else:
print("[!] That output mode is not supported! Check README for help.")

139
stats.py Normal file
View file

@ -0,0 +1,139 @@
import fedmsg
import fedmsg.meta
import calendar
import json
import requests
from collections import Counter
# This dictionary will be passed as param to requests later
values = dict()
values['user'] = None
values['delta'] = 604800
values['rows_per_page'] = 100
values['not_category'] = 'meetbot'
values['page'] = 1
values['size'] = 'small'
category = ''
start = ''
group = ''
end = ''
logs = False
weeks = 0
baseurl = "https://apps.fedoraproject.org/datagrepper/raw"
unicode_json = {}
def return_epoch(time):
if time == '':
return ''
tup = map(int, time.split('/'))
l = (tup[2], tup[0], tup[1], 0, 0, 0)
epochs = calendar.timegm(l)
return (int(epochs))
# Checks if unicode_json is empty, pulls datagrepper values and returns
# the json
def return_json():
global unicode_json
total_pages = 1
# Only pull the values from datagrepper if it's the first run
if len(unicode_json) == 0 or unicode_json['arguments']['users'][0] != values['user']:
print('[*] Grabbing datagrepper values for user ' + values['user'] + '..')
# If the user is set as all, we filter it using the provided category,
# if any
if category != '' and values['user'] == 'all':
values['category'] = category
if start != '' and end != '':
values['start'] = return_epoch(start)
values['end'] = return_epoch(end)
del(values['delta'])
# If the user value is passed as all, remove it from the dict and pass
# arguments
if values['user'] == 'all':
temp_dict = dict(values)
del(temp_dict['user'])
response = requests.get(baseurl, params=temp_dict)
else:
response = requests.get(baseurl, params=values)
unicode_json = json.loads(response.text)
total_pages = unicode_json['pages']
print ("Total pages found : " + str(total_pages))
total = total_pages
# If multiple pages exist, get them all.
while total_pages > 0:
print(" [*] Loading Page " + str(values['page']) + "/" + str(total))
values['page'] += 1
response = requests.get(baseurl, params=values)
paginated_json = json.loads(response.text)
# Pull data from multiple pages and append them to the main JSON
for activity in paginated_json['raw_messages']:
unicode_json['raw_messages'].append(activity)
total_pages -= 1
values['page'] = 1
return unicode_json
# Analyzes the JSON and return categories present as a list.
def return_categories():
cat_list = list()
categories = Counter()
unicode_json = return_json()
print("[*] Identifying Categories..")
for activity in unicode_json['raw_messages']:
# Split the topic using . param , extract the 4th word and append
cat_list.append(activity['topic'].split('.')[3])
for category in cat_list:
categories[category] += 1
return categories
# Given a category, looks for subcategories in the category and returns a
# sub-category counter.
def return_subcategories(category):
subcat_list = list()
subcategories = Counter()
print("[*] Identifying sub-categories..")
for activity in unicode_json['raw_messages']:
if category == activity['topic'].split('.')[3]:
subcat_list.append(activity['topic'].split('.')[4])
# Converts the list into a counter.
for subcategory in subcat_list:
subcategories[subcategory] += 1
return subcategories
# Gets the subcategories as a counter, analyzes it for further activities
# Returns a counter with the found interactions
def return_interactions(subcategories):
interaction_dict = dict()
interaction_list = list()
# Initializing the dictionary
for object in subcategories:
interaction_dict[object] = []
# Gathering sub-sub-categories
for activity in unicode_json['raw_messages']:
for object in subcategories:
try:
if object == activity['topic'].split('.')[4] and activity[
'topic'].split('.')[5]:
interaction_dict[object].append(
activity['topic'].split('.')[5])
except IndexError:
print("[!] That category doesn't have any more interactions!")
return {None: None}
# Changing list to a counter
for key in interaction_dict:
interaction_dict[key] = Counter(interaction_dict[key])
return interaction_dict